init commit

89eb5e4b · wangsen · 89eb5e4b · 89eb5e4b · 89eb5e4b · 89eb5e4b
Commit 89eb5e4b authored Aug 07, 2024 by wangsen
20 changed files
--- a/StyleText/arch/__init__.py
+++ b/StyleText/arch/__init__.py
--- a/StyleText/arch/base_module.py
+++ b/StyleText/arch/base_module.py
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import paddle
+import paddle.nn as nn
+from arch.spectral_norm import spectral_norm
+class CBN(nn.Layer):
+    def __init__(self,
+                 name,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 groups=1,
+                 use_bias=False,
+                 norm_layer=None,
+                 act=None,
+                 act_attr=None):
+        super(CBN, self).__init__()
+        if use_bias:
+            bias_attr = paddle.ParamAttr(name=name + "_bias")
+        else:
+            bias_attr = None
+        self._conv = paddle.nn.Conv2D(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            dilation=dilation,
+            groups=groups,
+            weight_attr=paddle.ParamAttr(name=name + "_weights"),
+            bias_attr=bias_attr)
+        if norm_layer:
+            self._norm_layer = getattr(paddle.nn, norm_layer)(
+                num_features=out_channels, name=name + "_bn")
+        else:
+            self._norm_layer = None
+        if act:
+            if act_attr:
+                self._act = getattr(paddle.nn, act)(**act_attr,
+                                                    name=name + "_" + act)
+            else:
+                self._act = getattr(paddle.nn, act)(name=name + "_" + act)
+        else:
+            self._act = None
+    def forward(self, x):
+        out = self._conv(x)
+        if self._norm_layer:
+            out = self._norm_layer(out)
+        if self._act:
+            out = self._act(out)
+        return out
+class SNConv(nn.Layer):
+    def __init__(self,
+                 name,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 groups=1,
+                 use_bias=False,
+                 norm_layer=None,
+                 act=None,
+                 act_attr=None):
+        super(SNConv, self).__init__()
+        if use_bias:
+            bias_attr = paddle.ParamAttr(name=name + "_bias")
+        else:
+            bias_attr = None
+        self._sn_conv = spectral_norm(
+            paddle.nn.Conv2D(
+                in_channels=in_channels,
+                out_channels=out_channels,
+                kernel_size=kernel_size,
+                stride=stride,
+                padding=padding,
+                dilation=dilation,
+                groups=groups,
+                weight_attr=paddle.ParamAttr(name=name + "_weights"),
+                bias_attr=bias_attr))
+        if norm_layer:
+            self._norm_layer = getattr(paddle.nn, norm_layer)(
+                num_features=out_channels, name=name + "_bn")
+        else:
+            self._norm_layer = None
+        if act:
+            if act_attr:
+                self._act = getattr(paddle.nn, act)(**act_attr,
+                                                    name=name + "_" + act)
+            else:
+                self._act = getattr(paddle.nn, act)(name=name + "_" + act)
+        else:
+            self._act = None
+    def forward(self, x):
+        out = self._sn_conv(x)
+        if self._norm_layer:
+            out = self._norm_layer(out)
+        if self._act:
+            out = self._act(out)
+        return out
+class SNConvTranspose(nn.Layer):
+    def __init__(self,
+                 name,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 padding=0,
+                 output_padding=0,
+                 dilation=1,
+                 groups=1,
+                 use_bias=False,
+                 norm_layer=None,
+                 act=None,
+                 act_attr=None):
+        super(SNConvTranspose, self).__init__()
+        if use_bias:
+            bias_attr = paddle.ParamAttr(name=name + "_bias")
+        else:
+            bias_attr = None
+        self._sn_conv_transpose = spectral_norm(
+            paddle.nn.Conv2DTranspose(
+                in_channels=in_channels,
+                out_channels=out_channels,
+                kernel_size=kernel_size,
+                stride=stride,
+                padding=padding,
+                output_padding=output_padding,
+                dilation=dilation,
+                groups=groups,
+                weight_attr=paddle.ParamAttr(name=name + "_weights"),
+                bias_attr=bias_attr))
+        if norm_layer:
+            self._norm_layer = getattr(paddle.nn, norm_layer)(
+                num_features=out_channels, name=name + "_bn")
+        else:
+            self._norm_layer = None
+        if act:
+            if act_attr:
+                self._act = getattr(paddle.nn, act)(**act_attr,
+                                                    name=name + "_" + act)
+            else:
+                self._act = getattr(paddle.nn, act)(name=name + "_" + act)
+        else:
+            self._act = None
+    def forward(self, x):
+        out = self._sn_conv_transpose(x)
+        if self._norm_layer:
+            out = self._norm_layer(out)
+        if self._act:
+            out = self._act(out)
+        return out
+class MiddleNet(nn.Layer):
+    def __init__(self, name, in_channels, mid_channels, out_channels,
+                 use_bias):
+        super(MiddleNet, self).__init__()
+        self._sn_conv1 = SNConv(
+            name=name + "_sn_conv1",
+            in_channels=in_channels,
+            out_channels=mid_channels,
+            kernel_size=1,
+            use_bias=use_bias,
+            norm_layer=None,
+            act=None)
+        self._pad2d = nn.Pad2D(padding=[1, 1, 1, 1], mode="replicate")
+        self._sn_conv2 = SNConv(
+            name=name + "_sn_conv2",
+            in_channels=mid_channels,
+            out_channels=mid_channels,
+            kernel_size=3,
+            use_bias=use_bias)
+        self._sn_conv3 = SNConv(
+            name=name + "_sn_conv3",
+            in_channels=mid_channels,
+            out_channels=out_channels,
+            kernel_size=1,
+            use_bias=use_bias)
+    def forward(self, x):
+        sn_conv1 = self._sn_conv1.forward(x)
+        pad_2d = self._pad2d.forward(sn_conv1)
+        sn_conv2 = self._sn_conv2.forward(pad_2d)
+        sn_conv3 = self._sn_conv3.forward(sn_conv2)
+        return sn_conv3
+class ResBlock(nn.Layer):
+    def __init__(self, name, channels, norm_layer, use_dropout, use_dilation,
+                 use_bias):
+        super(ResBlock, self).__init__()
+        if use_dilation:
+            padding_mat = [1, 1, 1, 1]
+        else:
+            padding_mat = [0, 0, 0, 0]
+        self._pad1 = nn.Pad2D(padding_mat, mode="replicate")
+        self._sn_conv1 = SNConv(
+            name=name + "_sn_conv1",
+            in_channels=channels,
+            out_channels=channels,
+            kernel_size=3,
+            padding=0,
+            norm_layer=norm_layer,
+            use_bias=use_bias,
+            act="ReLU",
+            act_attr=None)
+        if use_dropout:
+            self._dropout = nn.Dropout(0.5)
+        else:
+            self._dropout = None
+        self._pad2 = nn.Pad2D([1, 1, 1, 1], mode="replicate")
+        self._sn_conv2 = SNConv(
+            name=name + "_sn_conv2",
+            in_channels=channels,
+            out_channels=channels,
+            kernel_size=3,
+            norm_layer=norm_layer,
+            use_bias=use_bias,
+            act="ReLU",
+            act_attr=None)
+    def forward(self, x):
+        pad1 = self._pad1.forward(x)
+        sn_conv1 = self._sn_conv1.forward(pad1)
+        pad2 = self._pad2.forward(sn_conv1)
+        sn_conv2 = self._sn_conv2.forward(pad2)
+        return sn_conv2 + x
--- a/StyleText/arch/decoder.py
+++ b/StyleText/arch/decoder.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import paddle
+import paddle.nn as nn
+from arch.base_module import SNConv, SNConvTranspose, ResBlock
+class Decoder(nn.Layer):
+    def __init__(self, name, encode_dim, out_channels, use_bias, norm_layer,
+                 act, act_attr, conv_block_dropout, conv_block_num,
+                 conv_block_dilation, out_conv_act, out_conv_act_attr):
+        super(Decoder, self).__init__()
+        conv_blocks = []
+        for i in range(conv_block_num):
+            conv_blocks.append(
+                ResBlock(
+                    name="{}_conv_block_{}".format(name, i),
+                    channels=encode_dim * 8,
+                    norm_layer=norm_layer,
+                    use_dropout=conv_block_dropout,
+                    use_dilation=conv_block_dilation,
+                    use_bias=use_bias))
+        self.conv_blocks = nn.Sequential(*conv_blocks)
+        self._up1 = SNConvTranspose(
+            name=name + "_up1",
+            in_channels=encode_dim * 8,
+            out_channels=encode_dim * 4,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            output_padding=1,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act=act,
+            act_attr=act_attr)
+        self._up2 = SNConvTranspose(
+            name=name + "_up2",
+            in_channels=encode_dim * 4,
+            out_channels=encode_dim * 2,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            output_padding=1,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act=act,
+            act_attr=act_attr)
+        self._up3 = SNConvTranspose(
+            name=name + "_up3",
+            in_channels=encode_dim * 2,
+            out_channels=encode_dim,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            output_padding=1,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act=act,
+            act_attr=act_attr)
+        self._pad2d = paddle.nn.Pad2D([1, 1, 1, 1], mode="replicate")
+        self._out_conv = SNConv(
+            name=name + "_out_conv",
+            in_channels=encode_dim,
+            out_channels=out_channels,
+            kernel_size=3,
+            use_bias=use_bias,
+            norm_layer=None,
+            act=out_conv_act,
+            act_attr=out_conv_act_attr)
+    def forward(self, x):
+        if isinstance(x, (list, tuple)):
+            x = paddle.concat(x, axis=1)
+        output_dict = dict()
+        output_dict["conv_blocks"] = self.conv_blocks.forward(x)
+        output_dict["up1"] = self._up1.forward(output_dict["conv_blocks"])
+        output_dict["up2"] = self._up2.forward(output_dict["up1"])
+        output_dict["up3"] = self._up3.forward(output_dict["up2"])
+        output_dict["pad2d"] = self._pad2d.forward(output_dict["up3"])
+        output_dict["out_conv"] = self._out_conv.forward(output_dict["pad2d"])
+        return output_dict
+class DecoderUnet(nn.Layer):
+    def __init__(self, name, encode_dim, out_channels, use_bias, norm_layer,
+                 act, act_attr, conv_block_dropout, conv_block_num,
+                 conv_block_dilation, out_conv_act, out_conv_act_attr):
+        super(DecoderUnet, self).__init__()
+        conv_blocks = []
+        for i in range(conv_block_num):
+            conv_blocks.append(
+                ResBlock(
+                    name="{}_conv_block_{}".format(name, i),
+                    channels=encode_dim * 8,
+                    norm_layer=norm_layer,
+                    use_dropout=conv_block_dropout,
+                    use_dilation=conv_block_dilation,
+                    use_bias=use_bias))
+        self._conv_blocks = nn.Sequential(*conv_blocks)
+        self._up1 = SNConvTranspose(
+            name=name + "_up1",
+            in_channels=encode_dim * 8,
+            out_channels=encode_dim * 4,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            output_padding=1,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act=act,
+            act_attr=act_attr)
+        self._up2 = SNConvTranspose(
+            name=name + "_up2",
+            in_channels=encode_dim * 8,
+            out_channels=encode_dim * 2,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            output_padding=1,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act=act,
+            act_attr=act_attr)
+        self._up3 = SNConvTranspose(
+            name=name + "_up3",
+            in_channels=encode_dim * 4,
+            out_channels=encode_dim,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            output_padding=1,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act=act,
+            act_attr=act_attr)
+        self._pad2d = paddle.nn.Pad2D([1, 1, 1, 1], mode="replicate")
+        self._out_conv = SNConv(
+            name=name + "_out_conv",
+            in_channels=encode_dim,
+            out_channels=out_channels,
+            kernel_size=3,
+            use_bias=use_bias,
+            norm_layer=None,
+            act=out_conv_act,
+            act_attr=out_conv_act_attr)
+    def forward(self, x, y, feature2, feature1):
+        output_dict = dict()
+        output_dict["conv_blocks"] = self._conv_blocks(
+            paddle.concat(
+                (x, y), axis=1))
+        output_dict["up1"] = self._up1.forward(output_dict["conv_blocks"])
+        output_dict["up2"] = self._up2.forward(
+            paddle.concat(
+                (output_dict["up1"], feature2), axis=1))
+        output_dict["up3"] = self._up3.forward(
+            paddle.concat(
+                (output_dict["up2"], feature1), axis=1))
+        output_dict["pad2d"] = self._pad2d.forward(output_dict["up3"])
+        output_dict["out_conv"] = self._out_conv.forward(output_dict["pad2d"])
+        return output_dict
+class SingleDecoder(nn.Layer):
+    def __init__(self, name, encode_dim, out_channels, use_bias, norm_layer,
+                 act, act_attr, conv_block_dropout, conv_block_num,
+                 conv_block_dilation, out_conv_act, out_conv_act_attr):
+        super(SingleDecoder, self).__init__()
+        conv_blocks = []
+        for i in range(conv_block_num):
+            conv_blocks.append(
+                ResBlock(
+                    name="{}_conv_block_{}".format(name, i),
+                    channels=encode_dim * 4,
+                    norm_layer=norm_layer,
+                    use_dropout=conv_block_dropout,
+                    use_dilation=conv_block_dilation,
+                    use_bias=use_bias))
+        self._conv_blocks = nn.Sequential(*conv_blocks)
+        self._up1 = SNConvTranspose(
+            name=name + "_up1",
+            in_channels=encode_dim * 4,
+            out_channels=encode_dim * 4,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            output_padding=1,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act=act,
+            act_attr=act_attr)
+        self._up2 = SNConvTranspose(
+            name=name + "_up2",
+            in_channels=encode_dim * 8,
+            out_channels=encode_dim * 2,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            output_padding=1,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act=act,
+            act_attr=act_attr)
+        self._up3 = SNConvTranspose(
+            name=name + "_up3",
+            in_channels=encode_dim * 4,
+            out_channels=encode_dim,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            output_padding=1,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act=act,
+            act_attr=act_attr)
+        self._pad2d = paddle.nn.Pad2D([1, 1, 1, 1], mode="replicate")
+        self._out_conv = SNConv(
+            name=name + "_out_conv",
+            in_channels=encode_dim,
+            out_channels=out_channels,
+            kernel_size=3,
+            use_bias=use_bias,
+            norm_layer=None,
+            act=out_conv_act,
+            act_attr=out_conv_act_attr)
+    def forward(self, x, feature2, feature1):
+        output_dict = dict()
+        output_dict["conv_blocks"] = self._conv_blocks.forward(x)
+        output_dict["up1"] = self._up1.forward(output_dict["conv_blocks"])
+        output_dict["up2"] = self._up2.forward(
+            paddle.concat(
+                (output_dict["up1"], feature2), axis=1))
+        output_dict["up3"] = self._up3.forward(
+            paddle.concat(
+                (output_dict["up2"], feature1), axis=1))
+        output_dict["pad2d"] = self._pad2d.forward(output_dict["up3"])
+        output_dict["out_conv"] = self._out_conv.forward(output_dict["pad2d"])
+        return output_dict
--- a/StyleText/arch/encoder.py
+++ b/StyleText/arch/encoder.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import paddle
+import paddle.nn as nn
+from arch.base_module import SNConv, SNConvTranspose, ResBlock
+class Encoder(nn.Layer):
+    def __init__(self, name, in_channels, encode_dim, use_bias, norm_layer,
+                 act, act_attr, conv_block_dropout, conv_block_num,
+                 conv_block_dilation):
+        super(Encoder, self).__init__()
+        self._pad2d = paddle.nn.Pad2D([3, 3, 3, 3], mode="replicate")
+        self._in_conv = SNConv(
+            name=name + "_in_conv",
+            in_channels=in_channels,
+            out_channels=encode_dim,
+            kernel_size=7,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act=act,
+            act_attr=act_attr)
+        self._down1 = SNConv(
+            name=name + "_down1",
+            in_channels=encode_dim,
+            out_channels=encode_dim * 2,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act=act,
+            act_attr=act_attr)
+        self._down2 = SNConv(
+            name=name + "_down2",
+            in_channels=encode_dim * 2,
+            out_channels=encode_dim * 4,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act=act,
+            act_attr=act_attr)
+        self._down3 = SNConv(
+            name=name + "_down3",
+            in_channels=encode_dim * 4,
+            out_channels=encode_dim * 4,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act=act,
+            act_attr=act_attr)
+        conv_blocks = []
+        for i in range(conv_block_num):
+            conv_blocks.append(
+                ResBlock(
+                    name="{}_conv_block_{}".format(name, i),
+                    channels=encode_dim * 4,
+                    norm_layer=norm_layer,
+                    use_dropout=conv_block_dropout,
+                    use_dilation=conv_block_dilation,
+                    use_bias=use_bias))
+        self._conv_blocks = nn.Sequential(*conv_blocks)
+    def forward(self, x):
+        out_dict = dict()
+        x = self._pad2d(x)
+        out_dict["in_conv"] = self._in_conv.forward(x)
+        out_dict["down1"] = self._down1.forward(out_dict["in_conv"])
+        out_dict["down2"] = self._down2.forward(out_dict["down1"])
+        out_dict["down3"] = self._down3.forward(out_dict["down2"])
+        out_dict["res_blocks"] = self._conv_blocks.forward(out_dict["down3"])
+        return out_dict
+class EncoderUnet(nn.Layer):
+    def __init__(self, name, in_channels, encode_dim, use_bias, norm_layer,
+                 act, act_attr):
+        super(EncoderUnet, self).__init__()
+        self._pad2d = paddle.nn.Pad2D([3, 3, 3, 3], mode="replicate")
+        self._in_conv = SNConv(
+            name=name + "_in_conv",
+            in_channels=in_channels,
+            out_channels=encode_dim,
+            kernel_size=7,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act=act,
+            act_attr=act_attr)
+        self._down1 = SNConv(
+            name=name + "_down1",
+            in_channels=encode_dim,
+            out_channels=encode_dim * 2,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act=act,
+            act_attr=act_attr)
+        self._down2 = SNConv(
+            name=name + "_down2",
+            in_channels=encode_dim * 2,
+            out_channels=encode_dim * 2,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act=act,
+            act_attr=act_attr)
+        self._down3 = SNConv(
+            name=name + "_down3",
+            in_channels=encode_dim * 2,
+            out_channels=encode_dim * 2,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act=act,
+            act_attr=act_attr)
+        self._down4 = SNConv(
+            name=name + "_down4",
+            in_channels=encode_dim * 2,
+            out_channels=encode_dim * 2,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act=act,
+            act_attr=act_attr)
+        self._up1 = SNConvTranspose(
+            name=name + "_up1",
+            in_channels=encode_dim * 2,
+            out_channels=encode_dim * 2,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act=act,
+            act_attr=act_attr)
+        self._up2 = SNConvTranspose(
+            name=name + "_up2",
+            in_channels=encode_dim * 4,
+            out_channels=encode_dim * 4,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act=act,
+            act_attr=act_attr)
+    def forward(self, x):
+        output_dict = dict()
+        x = self._pad2d(x)
+        output_dict['in_conv'] = self._in_conv.forward(x)
+        output_dict['down1'] = self._down1.forward(output_dict['in_conv'])
+        output_dict['down2'] = self._down2.forward(output_dict['down1'])
+        output_dict['down3'] = self._down3.forward(output_dict['down2'])
+        output_dict['down4'] = self._down4.forward(output_dict['down3'])
+        output_dict['up1'] = self._up1.forward(output_dict['down4'])
+        output_dict['up2'] = self._up2.forward(
+            paddle.concat(
+                (output_dict['down3'], output_dict['up1']), axis=1))
+        output_dict['concat'] = paddle.concat(
+            (output_dict['down2'], output_dict['up2']), axis=1)
+        return output_dict
--- a/StyleText/arch/spectral_norm.py
+++ b/StyleText/arch/spectral_norm.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+def normal_(x, mean=0., std=1.):
+    temp_value = paddle.normal(mean, std, shape=x.shape)
+    x.set_value(temp_value)
+    return x
+class SpectralNorm(object):
+    def __init__(self, name='weight', n_power_iterations=1, dim=0, eps=1e-12):
+        self.name = name
+        self.dim = dim
+        if n_power_iterations <= 0:
+            raise ValueError('Expected n_power_iterations to be positive, but '
+                             'got n_power_iterations={}'.format(
+                                 n_power_iterations))
+        self.n_power_iterations = n_power_iterations
+        self.eps = eps
+    def reshape_weight_to_matrix(self, weight):
+        weight_mat = weight
+        if self.dim != 0:
+            # transpose dim to front
+            weight_mat = weight_mat.transpose([
+                self.dim,
+                * [d for d in range(weight_mat.dim()) if d != self.dim]
+            ])
+        height = weight_mat.shape[0]
+        return weight_mat.reshape([height, -1])
+    def compute_weight(self, module, do_power_iteration):
+        weight = getattr(module, self.name + '_orig')
+        u = getattr(module, self.name + '_u')
+        v = getattr(module, self.name + '_v')
+        weight_mat = self.reshape_weight_to_matrix(weight)
+        if do_power_iteration:
+            with paddle.no_grad():
+                for _ in range(self.n_power_iterations):
+                    v.set_value(
+                        F.normalize(
+                            paddle.matmul(
+                                weight_mat,
+                                u,
+                                transpose_x=True,
+                                transpose_y=False),
+                            axis=0,
+                            epsilon=self.eps, ))
+                    u.set_value(
+                        F.normalize(
+                            paddle.matmul(weight_mat, v),
+                            axis=0,
+                            epsilon=self.eps, ))
+                if self.n_power_iterations > 0:
+                    u = u.clone()
+                    v = v.clone()
+        sigma = paddle.dot(u, paddle.mv(weight_mat, v))
+        weight = weight / sigma
+        return weight
+    def remove(self, module):
+        with paddle.no_grad():
+            weight = self.compute_weight(module, do_power_iteration=False)
+        delattr(module, self.name)
+        delattr(module, self.name + '_u')
+        delattr(module, self.name + '_v')
+        delattr(module, self.name + '_orig')
+        module.add_parameter(self.name, weight.detach())
+    def __call__(self, module, inputs):
+        setattr(
+            module,
+            self.name,
+            self.compute_weight(
+                module, do_power_iteration=module.training))
+    @staticmethod
+    def apply(module, name, n_power_iterations, dim, eps):
+        for k, hook in module._forward_pre_hooks.items():
+            if isinstance(hook, SpectralNorm) and hook.name == name:
+                raise RuntimeError(
+                    "Cannot register two spectral_norm hooks on "
+                    "the same parameter {}".format(name))
+        fn = SpectralNorm(name, n_power_iterations, dim, eps)
+        weight = module._parameters[name]
+        with paddle.no_grad():
+            weight_mat = fn.reshape_weight_to_matrix(weight)
+            h, w = weight_mat.shape
+            # randomly initialize u and v
+            u = module.create_parameter([h])
+            u = normal_(u, 0., 1.)
+            v = module.create_parameter([w])
+            v = normal_(v, 0., 1.)
+            u = F.normalize(u, axis=0, epsilon=fn.eps)
+            v = F.normalize(v, axis=0, epsilon=fn.eps)
+        # delete fn.name form parameters, otherwise you can not set attribute
+        del module._parameters[fn.name]
+        module.add_parameter(fn.name + "_orig", weight)
+        # still need to assign weight back as fn.name because all sorts of
+        # things may assume that it exists, e.g., when initializing weights.
+        # However, we can't directly assign as it could be an Parameter and
+        # gets added as a parameter. Instead, we register weight * 1.0 as a plain
+        # attribute.
+        setattr(module, fn.name, weight * 1.0)
+        module.register_buffer(fn.name + "_u", u)
+        module.register_buffer(fn.name + "_v", v)
+        module.register_forward_pre_hook(fn)
+        return fn
+def spectral_norm(module,
+                  name='weight',
+                  n_power_iterations=1,
+                  eps=1e-12,
+                  dim=None):
+    if dim is None:
+        if isinstance(module, (nn.Conv1DTranspose, nn.Conv2DTranspose,
+                               nn.Conv3DTranspose, nn.Linear)):
+            dim = 1
+        else:
+            dim = 0
+    SpectralNorm.apply(module, name, n_power_iterations, dim, eps)
+    return module
--- a/StyleText/arch/style_text_rec.py
+++ b/StyleText/arch/style_text_rec.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import paddle
+import paddle.nn as nn
+from arch.base_module import MiddleNet, ResBlock
+from arch.encoder import Encoder
+from arch.decoder import Decoder, DecoderUnet, SingleDecoder
+from utils.load_params import load_dygraph_pretrain
+from utils.logging import get_logger
+class StyleTextRec(nn.Layer):
+    def __init__(self, config):
+        super(StyleTextRec, self).__init__()
+        self.logger = get_logger()
+        self.text_generator = TextGenerator(config["Predictor"][
+            "text_generator"])
+        self.bg_generator = BgGeneratorWithMask(config["Predictor"][
+            "bg_generator"])
+        self.fusion_generator = FusionGeneratorSimple(config["Predictor"][
+            "fusion_generator"])
+        bg_generator_pretrain = config["Predictor"]["bg_generator"]["pretrain"]
+        text_generator_pretrain = config["Predictor"]["text_generator"][
+            "pretrain"]
+        fusion_generator_pretrain = config["Predictor"]["fusion_generator"][
+            "pretrain"]
+        load_dygraph_pretrain(
+            self.bg_generator,
+            self.logger,
+            path=bg_generator_pretrain,
+            load_static_weights=False)
+        load_dygraph_pretrain(
+            self.text_generator,
+            self.logger,
+            path=text_generator_pretrain,
+            load_static_weights=False)
+        load_dygraph_pretrain(
+            self.fusion_generator,
+            self.logger,
+            path=fusion_generator_pretrain,
+            load_static_weights=False)
+    def forward(self, style_input, text_input):
+        text_gen_output = self.text_generator.forward(style_input, text_input)
+        fake_text = text_gen_output["fake_text"]
+        fake_sk = text_gen_output["fake_sk"]
+        bg_gen_output = self.bg_generator.forward(style_input)
+        bg_encode_feature = bg_gen_output["bg_encode_feature"]
+        bg_decode_feature1 = bg_gen_output["bg_decode_feature1"]
+        bg_decode_feature2 = bg_gen_output["bg_decode_feature2"]
+        fake_bg = bg_gen_output["fake_bg"]
+        fusion_gen_output = self.fusion_generator.forward(fake_text, fake_bg)
+        fake_fusion = fusion_gen_output["fake_fusion"]
+        return {
+            "fake_fusion": fake_fusion,
+            "fake_text": fake_text,
+            "fake_sk": fake_sk,
+            "fake_bg": fake_bg,
+        }
+class TextGenerator(nn.Layer):
+    def __init__(self, config):
+        super(TextGenerator, self).__init__()
+        name = config["module_name"]
+        encode_dim = config["encode_dim"]
+        norm_layer = config["norm_layer"]
+        conv_block_dropout = config["conv_block_dropout"]
+        conv_block_num = config["conv_block_num"]
+        conv_block_dilation = config["conv_block_dilation"]
+        if norm_layer == "InstanceNorm2D":
+            use_bias = True
+        else:
+            use_bias = False
+        self.encoder_text = Encoder(
+            name=name + "_encoder_text",
+            in_channels=3,
+            encode_dim=encode_dim,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act="ReLU",
+            act_attr=None,
+            conv_block_dropout=conv_block_dropout,
+            conv_block_num=conv_block_num,
+            conv_block_dilation=conv_block_dilation)
+        self.encoder_style = Encoder(
+            name=name + "_encoder_style",
+            in_channels=3,
+            encode_dim=encode_dim,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act="ReLU",
+            act_attr=None,
+            conv_block_dropout=conv_block_dropout,
+            conv_block_num=conv_block_num,
+            conv_block_dilation=conv_block_dilation)
+        self.decoder_text = Decoder(
+            name=name + "_decoder_text",
+            encode_dim=encode_dim,
+            out_channels=int(encode_dim / 2),
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act="ReLU",
+            act_attr=None,
+            conv_block_dropout=conv_block_dropout,
+            conv_block_num=conv_block_num,
+            conv_block_dilation=conv_block_dilation,
+            out_conv_act="Tanh",
+            out_conv_act_attr=None)
+        self.decoder_sk = Decoder(
+            name=name + "_decoder_sk",
+            encode_dim=encode_dim,
+            out_channels=1,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act="ReLU",
+            act_attr=None,
+            conv_block_dropout=conv_block_dropout,
+            conv_block_num=conv_block_num,
+            conv_block_dilation=conv_block_dilation,
+            out_conv_act="Sigmoid",
+            out_conv_act_attr=None)
+        self.middle = MiddleNet(
+            name=name + "_middle_net",
+            in_channels=int(encode_dim / 2) + 1,
+            mid_channels=encode_dim,
+            out_channels=3,
+            use_bias=use_bias)
+    def forward(self, style_input, text_input):
+        style_feature = self.encoder_style.forward(style_input)["res_blocks"]
+        text_feature = self.encoder_text.forward(text_input)["res_blocks"]
+        fake_c_temp = self.decoder_text.forward([text_feature,
+                                                 style_feature])["out_conv"]
+        fake_sk = self.decoder_sk.forward([text_feature,
+                                           style_feature])["out_conv"]
+        fake_text = self.middle(paddle.concat((fake_c_temp, fake_sk), axis=1))
+        return {"fake_sk": fake_sk, "fake_text": fake_text}
+class BgGeneratorWithMask(nn.Layer):
+    def __init__(self, config):
+        super(BgGeneratorWithMask, self).__init__()
+        name = config["module_name"]
+        encode_dim = config["encode_dim"]
+        norm_layer = config["norm_layer"]
+        conv_block_dropout = config["conv_block_dropout"]
+        conv_block_num = config["conv_block_num"]
+        conv_block_dilation = config["conv_block_dilation"]
+        self.output_factor = config.get("output_factor", 1.0)
+        if norm_layer == "InstanceNorm2D":
+            use_bias = True
+        else:
+            use_bias = False
+        self.encoder_bg = Encoder(
+            name=name + "_encoder_bg",
+            in_channels=3,
+            encode_dim=encode_dim,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act="ReLU",
+            act_attr=None,
+            conv_block_dropout=conv_block_dropout,
+            conv_block_num=conv_block_num,
+            conv_block_dilation=conv_block_dilation)
+        self.decoder_bg = SingleDecoder(
+            name=name + "_decoder_bg",
+            encode_dim=encode_dim,
+            out_channels=3,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act="ReLU",
+            act_attr=None,
+            conv_block_dropout=conv_block_dropout,
+            conv_block_num=conv_block_num,
+            conv_block_dilation=conv_block_dilation,
+            out_conv_act="Tanh",
+            out_conv_act_attr=None)
+        self.decoder_mask = Decoder(
+            name=name + "_decoder_mask",
+            encode_dim=encode_dim // 2,
+            out_channels=1,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act="ReLU",
+            act_attr=None,
+            conv_block_dropout=conv_block_dropout,
+            conv_block_num=conv_block_num,
+            conv_block_dilation=conv_block_dilation,
+            out_conv_act="Sigmoid",
+            out_conv_act_attr=None)
+        self.middle = MiddleNet(
+            name=name + "_middle_net",
+            in_channels=3 + 1,
+            mid_channels=encode_dim,
+            out_channels=3,
+            use_bias=use_bias)
+    def forward(self, style_input):
+        encode_bg_output = self.encoder_bg(style_input)
+        decode_bg_output = self.decoder_bg(encode_bg_output["res_blocks"],
+                                           encode_bg_output["down2"],
+                                           encode_bg_output["down1"])
+        fake_c_temp = decode_bg_output["out_conv"]
+        fake_bg_mask = self.decoder_mask.forward(encode_bg_output[
+            "res_blocks"])["out_conv"]
+        fake_bg = self.middle(
+            paddle.concat(
+                (fake_c_temp, fake_bg_mask), axis=1))
+        return {
+            "bg_encode_feature": encode_bg_output["res_blocks"],
+            "bg_decode_feature1": decode_bg_output["up1"],
+            "bg_decode_feature2": decode_bg_output["up2"],
+            "fake_bg": fake_bg,
+            "fake_bg_mask": fake_bg_mask,
+        }
+class FusionGeneratorSimple(nn.Layer):
+    def __init__(self, config):
+        super(FusionGeneratorSimple, self).__init__()
+        name = config["module_name"]
+        encode_dim = config["encode_dim"]
+        norm_layer = config["norm_layer"]
+        conv_block_dropout = config["conv_block_dropout"]
+        conv_block_dilation = config["conv_block_dilation"]
+        if norm_layer == "InstanceNorm2D":
+            use_bias = True
+        else:
+            use_bias = False
+        self._conv = nn.Conv2D(
+            in_channels=6,
+            out_channels=encode_dim,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            groups=1,
+            weight_attr=paddle.ParamAttr(name=name + "_conv_weights"),
+            bias_attr=False)
+        self._res_block = ResBlock(
+            name="{}_conv_block".format(name),
+            channels=encode_dim,
+            norm_layer=norm_layer,
+            use_dropout=conv_block_dropout,
+            use_dilation=conv_block_dilation,
+            use_bias=use_bias)
+        self._reduce_conv = nn.Conv2D(
+            in_channels=encode_dim,
+            out_channels=3,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            groups=1,
+            weight_attr=paddle.ParamAttr(name=name + "_reduce_conv_weights"),
+            bias_attr=False)
+    def forward(self, fake_text, fake_bg):
+        fake_concat = paddle.concat((fake_text, fake_bg), axis=1)
+        fake_concat_tmp = self._conv(fake_concat)
+        output_res = self._res_block(fake_concat_tmp)
+        fake_fusion = self._reduce_conv(output_res)
+        return {"fake_fusion": fake_fusion}
--- a/StyleText/configs/config.yml
+++ b/StyleText/configs/config.yml
+Global:
+  output_num: 10
+  output_dir: output_data
+  use_gpu: false
+  image_height: 32
+  image_width: 320
+TextDrawer:
+  fonts:
+    en: fonts/en_standard.ttf
+    ch: fonts/ch_standard.ttf
+    ko: fonts/ko_standard.ttf
+Predictor:
+  method: StyleTextRecPredictor
+  algorithm: StyleTextRec
+  scale: 0.00392156862745098
+  mean:
+  - 0.5
+  - 0.5
+  - 0.5
+  std:
+  - 0.5
+  - 0.5
+  - 0.5
+  expand_result: false
+  bg_generator:
+    pretrain: style_text_models/bg_generator
+    module_name: bg_generator
+    generator_type: BgGeneratorWithMask
+    encode_dim: 64
+    norm_layer: null
+    conv_block_num: 4
+    conv_block_dropout: false
+    conv_block_dilation: true
+    output_factor: 1.05
+  text_generator:
+    pretrain: style_text_models/text_generator
+    module_name: text_generator
+    generator_type: TextGenerator
+    encode_dim: 64
+    norm_layer: InstanceNorm2D
+    conv_block_num: 4
+    conv_block_dropout: false
+    conv_block_dilation: true
+  fusion_generator:
+    pretrain: style_text_models/fusion_generator
+    module_name: fusion_generator
+    generator_type: FusionGeneratorSimple
+    encode_dim: 64
+    norm_layer: null
+    conv_block_num: 4
+    conv_block_dropout: false
+    conv_block_dilation: true
+Writer:
+  method: SimpleWriter
--- a/StyleText/configs/dataset_config.yml
+++ b/StyleText/configs/dataset_config.yml
+Global:
+  output_num: 10
+  output_dir: output_data
+  use_gpu: false
+  image_height: 32
+  image_width: 320
+  standard_font: fonts/en_standard.ttf
+TextDrawer:
+  fonts:
+    en: fonts/en_standard.ttf
+    ch: fonts/ch_standard.ttf
+    ko: fonts/ko_standard.ttf
+StyleSampler:
+  method: DatasetSampler
+  image_home: examples
+  label_file: examples/image_list.txt
+  with_label: true
+CorpusGenerator:
+  method: FileCorpus
+  language: ch
+  corpus_file: examples/corpus/example.txt
+Predictor:
+  method: StyleTextRecPredictor
+  algorithm: StyleTextRec
+  scale: 0.00392156862745098
+  mean:
+  - 0.5
+  - 0.5
+  - 0.5
+  std:
+  - 0.5
+  - 0.5
+  - 0.5
+  expand_result: false
+  bg_generator:
+    pretrain: style_text_models/bg_generator
+    module_name: bg_generator
+    generator_type: BgGeneratorWithMask
+    encode_dim: 64
+    norm_layer: null
+    conv_block_num: 4
+    conv_block_dropout: false
+    conv_block_dilation: true
+    output_factor: 1.05
+  text_generator:
+    pretrain: style_text_models/text_generator
+    module_name: text_generator
+    generator_type: TextGenerator
+    encode_dim: 64
+    norm_layer: InstanceNorm2D
+    conv_block_num: 4
+    conv_block_dropout: false
+    conv_block_dilation: true
+  fusion_generator:
+    pretrain: style_text_models/fusion_generator
+    module_name: fusion_generator
+    generator_type: FusionGeneratorSimple
+    encode_dim: 64
+    norm_layer: null
+    conv_block_num: 4
+    conv_block_dropout: false
+    conv_block_dilation: true
+Writer:
+  method: SimpleWriter
--- a/StyleText/doc/images/1.png
+++ b/StyleText/doc/images/1.png
--- a/StyleText/doc/images/10.png
+++ b/StyleText/doc/images/10.png
--- a/StyleText/doc/images/11.png
+++ b/StyleText/doc/images/11.png
--- a/StyleText/doc/images/12.png
+++ b/StyleText/doc/images/12.png
--- a/StyleText/doc/images/2.png
+++ b/StyleText/doc/images/2.png
--- a/StyleText/doc/images/3.png
+++ b/StyleText/doc/images/3.png
--- a/StyleText/doc/images/4.jpg
+++ b/StyleText/doc/images/4.jpg
--- a/StyleText/doc/images/5.png
+++ b/StyleText/doc/images/5.png
--- a/StyleText/doc/images/6.png
+++ b/StyleText/doc/images/6.png
--- a/StyleText/doc/images/7.jpg
+++ b/StyleText/doc/images/7.jpg
--- a/StyleText/doc/images/8.jpg
+++ b/StyleText/doc/images/8.jpg
--- a/StyleText/doc/images/9.png
+++ b/StyleText/doc/images/9.png