add style_text_rec

f2d98c5e · weishengyu · b1623d69 · f2d98c5e · f2d98c5e · f2d98c5e
Commit f2d98c5e authored Dec 15, 2020 by weishengyu
20 changed files
--- a/tools/style_text_rec/arch/base_module.py
+++ b/tools/style_text_rec/arch/base_module.py
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import functools
+import paddle
+import paddle.nn as nn
+from arch.spectral_norm import spectral_norm
+class CBN(nn.Layer):
+    def __init__(self,
+                 name,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 groups=1,
+                 use_bias=False,
+                 norm_layer=None,
+                 act=None,
+                 act_attr=None):
+        super(CBN, self).__init__()
+        if use_bias:
+            bias_attr = paddle.ParamAttr(name=name + "_bias")
+        else:
+            bias_attr = None
+        self._conv = paddle.nn.Conv2D(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            dilation=dilation,
+            groups=groups,
+            weight_attr=paddle.ParamAttr(name=name + "_weights"),
+            bias_attr=bias_attr)
+        if norm_layer:
+            self._norm_layer = getattr(paddle.nn, norm_layer)(
+                num_features=out_channels, name=name + "_bn")
+        else:
+            self._norm_layer = None
+        if act:
+            if act_attr:
+                self._act = getattr(paddle.nn, act)(**act_attr,
+                                                    name=name + "_" + act)
+            else:
+                self._act = getattr(paddle.nn, act)(name=name + "_" + act)
+        else:
+            self._act = None
+    def forward(self, x):
+        out = self._conv(x)
+        if self._norm_layer:
+            out = self._norm_layer(out)
+        if self._act:
+            out = self._act(out)
+        return out
+class SNConv(nn.Layer):
+    def __init__(self,
+                 name,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 groups=1,
+                 use_bias=False,
+                 norm_layer=None,
+                 act=None,
+                 act_attr=None):
+        super(SNConv, self).__init__()
+        if use_bias:
+            bias_attr = paddle.ParamAttr(name=name + "_bias")
+        else:
+            bias_attr = None
+        self._sn_conv = spectral_norm(
+            paddle.nn.Conv2D(
+                in_channels=in_channels,
+                out_channels=out_channels,
+                kernel_size=kernel_size,
+                stride=stride,
+                padding=padding,
+                dilation=dilation,
+                groups=groups,
+                weight_attr=paddle.ParamAttr(name=name + "_weights"),
+                bias_attr=bias_attr))
+        if norm_layer:
+            self._norm_layer = getattr(paddle.nn, norm_layer)(
+                num_features=out_channels, name=name + "_bn")
+        else:
+            self._norm_layer = None
+        if act:
+            if act_attr:
+                self._act = getattr(paddle.nn, act)(**act_attr,
+                                                    name=name + "_" + act)
+            else:
+                self._act = getattr(paddle.nn, act)(name=name + "_" + act)
+        else:
+            self._act = None
+    def forward(self, x):
+        out = self._sn_conv(x)
+        if self._norm_layer:
+            out = self._norm_layer(out)
+        if self._act:
+            out = self._act(out)
+        return out
+class SNConvTranspose(nn.Layer):
+    def __init__(self,
+                 name,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 padding=0,
+                 output_padding=0,
+                 dilation=1,
+                 groups=1,
+                 use_bias=False,
+                 norm_layer=None,
+                 act=None,
+                 act_attr=None):
+        super(SNConvTranspose, self).__init__()
+        if use_bias:
+            bias_attr = paddle.ParamAttr(name=name + "_bias")
+        else:
+            bias_attr = None
+        self._sn_conv_transpose = spectral_norm(
+            paddle.nn.Conv2DTranspose(
+                in_channels=in_channels,
+                out_channels=out_channels,
+                kernel_size=kernel_size,
+                stride=stride,
+                padding=padding,
+                output_padding=output_padding,
+                dilation=dilation,
+                groups=groups,
+                weight_attr=paddle.ParamAttr(name=name + "_weights"),
+                bias_attr=bias_attr))
+        if norm_layer:
+            self._norm_layer = getattr(paddle.nn, norm_layer)(
+                num_features=out_channels, name=name + "_bn")
+        else:
+            self._norm_layer = None
+        if act:
+            if act_attr:
+                self._act = getattr(paddle.nn, act)(**act_attr,
+                                                    name=name + "_" + act)
+            else:
+                self._act = getattr(paddle.nn, act)(name=name + "_" + act)
+        else:
+            self._act = None
+    def forward(self, x):
+        out = self._sn_conv_transpose(x)
+        if self._norm_layer:
+            out = self._norm_layer(out)
+        if self._act:
+            out = self._act(out)
+        return out
+class MiddleNet(nn.Layer):
+    def __init__(self, name, in_channels, mid_channels, out_channels,
+                 use_bias):
+        super(MiddleNet, self).__init__()
+        self._sn_conv1 = SNConv(
+            name=name + "_sn_conv1",
+            in_channels=in_channels,
+            out_channels=mid_channels,
+            kernel_size=1,
+            use_bias=use_bias,
+            norm_layer=None,
+            act=None)
+        self._pad2d = nn.Pad2D(padding=[1, 1, 1, 1], mode="replicate")
+        self._sn_conv2 = SNConv(
+            name=name + "_sn_conv2",
+            in_channels=mid_channels,
+            out_channels=mid_channels,
+            kernel_size=3,
+            use_bias=use_bias)
+        self._sn_conv3 = SNConv(
+            name=name + "_sn_conv3",
+            in_channels=mid_channels,
+            out_channels=out_channels,
+            kernel_size=1,
+            use_bias=use_bias)
+    def forward(self, x):
+        sn_conv1 = self._sn_conv1.forward(x)
+        pad_2d = self._pad2d.forward(sn_conv1)
+        sn_conv2 = self._sn_conv2.forward(pad_2d)
+        sn_conv3 = self._sn_conv3.forward(sn_conv2)
+        return sn_conv3
+class ResBlock(nn.Layer):
+    def __init__(self, name, channels, norm_layer, use_dropout, use_dilation,
+                 use_bias):
+        super(ResBlock, self).__init__()
+        if use_dilation:
+            padding_mat = [1, 1, 1, 1]
+        else:
+            padding_mat = [0, 0, 0, 0]
+        self._pad1 = nn.Pad2D(padding_mat, mode="replicate")
+        self._sn_conv1 = SNConv(
+            name=name + "_sn_conv1",
+            in_channels=channels,
+            out_channels=channels,
+            kernel_size=3,
+            padding=0,
+            norm_layer=norm_layer,
+            use_bias=use_bias,
+            act="ReLU",
+            act_attr=None)
+        if use_dropout:
+            self._dropout = nn.Dropout(0.5)
+        else:
+            self._dropout = None
+        self._pad2 = nn.Pad2D([1, 1, 1, 1], mode="replicate")
+        self._sn_conv2 = SNConv(
+            name=name + "_sn_conv2",
+            in_channels=channels,
+            out_channels=channels,
+            kernel_size=3,
+            norm_layer=norm_layer,
+            use_bias=use_bias,
+            act="ReLU",
+            act_attr=None)
+    def forward(self, x):
+        pad1 = self._pad1.forward(x)
+        sn_conv1 = self._sn_conv1.forward(pad1)
+        pad2 = self._pad2.forward(sn_conv1)
+        sn_conv2 = self._sn_conv2.forward(pad2)
+        return sn_conv2 + x
--- a/tools/style_text_rec/arch/decoder.py
+++ b/tools/style_text_rec/arch/decoder.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import paddle
+import paddle.nn as nn
+from arch.base_module import SNConv, SNConvTranspose, ResBlock
+class Decoder(nn.Layer):
+    def __init__(self, name, encode_dim, out_channels, use_bias, norm_layer,
+                 act, act_attr, conv_block_dropout, conv_block_num,
+                 conv_block_dilation, out_conv_act, out_conv_act_attr):
+        super(Decoder, self).__init__()
+        conv_blocks = []
+        for i in range(conv_block_num):
+            conv_blocks.append(
+                ResBlock(
+                    name="{}_conv_block_{}".format(name, i),
+                    channels=encode_dim * 8,
+                    norm_layer=norm_layer,
+                    use_dropout=conv_block_dropout,
+                    use_dilation=conv_block_dilation,
+                    use_bias=use_bias))
+        self.conv_blocks = nn.Sequential(*conv_blocks)
+        self._up1 = SNConvTranspose(
+            name=name + "_up1",
+            in_channels=encode_dim * 8,
+            out_channels=encode_dim * 4,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            output_padding=1,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act=act,
+            act_attr=act_attr)
+        self._up2 = SNConvTranspose(
+            name=name + "_up2",
+            in_channels=encode_dim * 4,
+            out_channels=encode_dim * 2,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            output_padding=1,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act=act,
+            act_attr=act_attr)
+        self._up3 = SNConvTranspose(
+            name=name + "_up3",
+            in_channels=encode_dim * 2,
+            out_channels=encode_dim,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            output_padding=1,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act=act,
+            act_attr=act_attr)
+        self._pad2d = paddle.nn.Pad2D([1, 1, 1, 1], mode="replicate")
+        self._out_conv = SNConv(
+            name=name + "_out_conv",
+            in_channels=encode_dim,
+            out_channels=out_channels,
+            kernel_size=3,
+            use_bias=use_bias,
+            norm_layer=None,
+            act=out_conv_act,
+            act_attr=out_conv_act_attr)
+    def forward(self, x):
+        if isinstance(x, (list, tuple)):
+            x = paddle.concat(x, axis=1)
+        output_dict = dict()
+        output_dict["conv_blocks"] = self.conv_blocks.forward(x)
+        output_dict["up1"] = self._up1.forward(output_dict["conv_blocks"])
+        output_dict["up2"] = self._up2.forward(output_dict["up1"])
+        output_dict["up3"] = self._up3.forward(output_dict["up2"])
+        output_dict["pad2d"] = self._pad2d.forward(output_dict["up3"])
+        output_dict["out_conv"] = self._out_conv.forward(output_dict["pad2d"])
+        return output_dict
+class DecoderUnet(nn.Layer):
+    def __init__(self, name, encode_dim, out_channels, use_bias, norm_layer,
+                 act, act_attr, conv_block_dropout, conv_block_num,
+                 conv_block_dilation, out_conv_act, out_conv_act_attr):
+        super(DecoderUnet, self).__init__()
+        conv_blocks = []
+        for i in range(conv_block_num):
+            conv_blocks.append(
+                ResBlock(
+                    name="{}_conv_block_{}".format(name, i),
+                    channels=encode_dim * 8,
+                    norm_layer=norm_layer,
+                    use_dropout=conv_block_dropout,
+                    use_dilation=conv_block_dilation,
+                    use_bias=use_bias))
+        self._conv_blocks = nn.Sequential(*conv_blocks)
+        self._up1 = SNConvTranspose(
+            name=name + "_up1",
+            in_channels=encode_dim * 8,
+            out_channels=encode_dim * 4,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            output_padding=1,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act=act,
+            act_attr=act_attr)
+        self._up2 = SNConvTranspose(
+            name=name + "_up2",
+            in_channels=encode_dim * 8,
+            out_channels=encode_dim * 2,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            output_padding=1,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act=act,
+            act_attr=act_attr)
+        self._up3 = SNConvTranspose(
+            name=name + "_up3",
+            in_channels=encode_dim * 4,
+            out_channels=encode_dim,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            output_padding=1,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act=act,
+            act_attr=act_attr)
+        self._pad2d = paddle.nn.Pad2D([1, 1, 1, 1], mode="replicate")
+        self._out_conv = SNConv(
+            name=name + "_out_conv",
+            in_channels=encode_dim,
+            out_channels=out_channels,
+            kernel_size=3,
+            use_bias=use_bias,
+            norm_layer=None,
+            act=out_conv_act,
+            act_attr=out_conv_act_attr)
+    def forward(self, x, y, feature2, feature1):
+        output_dict = dict()
+        output_dict["conv_blocks"] = self._conv_blocks(
+            paddle.concat(
+                (x, y), axis=1))
+        output_dict["up1"] = self._up1.forward(output_dict["conv_blocks"])
+        output_dict["up2"] = self._up2.forward(
+            paddle.concat(
+                (output_dict["up1"], feature2), axis=1))
+        output_dict["up3"] = self._up3.forward(
+            paddle.concat(
+                (output_dict["up2"], feature1), axis=1))
+        output_dict["pad2d"] = self._pad2d.forward(output_dict["up3"])
+        output_dict["out_conv"] = self._out_conv.forward(output_dict["pad2d"])
+        return output_dict
+class SingleDecoder(nn.Layer):
+    def __init__(self, name, encode_dim, out_channels, use_bias, norm_layer,
+                 act, act_attr, conv_block_dropout, conv_block_num,
+                 conv_block_dilation, out_conv_act, out_conv_act_attr):
+        super(SingleDecoder, self).__init__()
+        conv_blocks = []
+        for i in range(conv_block_num):
+            conv_blocks.append(
+                ResBlock(
+                    name="{}_conv_block_{}".format(name, i),
+                    channels=encode_dim * 4,
+                    norm_layer=norm_layer,
+                    use_dropout=conv_block_dropout,
+                    use_dilation=conv_block_dilation,
+                    use_bias=use_bias))
+        self._conv_blocks = nn.Sequential(*conv_blocks)
+        self._up1 = SNConvTranspose(
+            name=name + "_up1",
+            in_channels=encode_dim * 4,
+            out_channels=encode_dim * 4,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            output_padding=1,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act=act,
+            act_attr=act_attr)
+        self._up2 = SNConvTranspose(
+            name=name + "_up2",
+            in_channels=encode_dim * 8,
+            out_channels=encode_dim * 2,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            output_padding=1,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act=act,
+            act_attr=act_attr)
+        self._up3 = SNConvTranspose(
+            name=name + "_up3",
+            in_channels=encode_dim * 4,
+            out_channels=encode_dim,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            output_padding=1,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act=act,
+            act_attr=act_attr)
+        self._pad2d = paddle.nn.Pad2D([1, 1, 1, 1], mode="replicate")
+        self._out_conv = SNConv(
+            name=name + "_out_conv",
+            in_channels=encode_dim,
+            out_channels=out_channels,
+            kernel_size=3,
+            use_bias=use_bias,
+            norm_layer=None,
+            act=out_conv_act,
+            act_attr=out_conv_act_attr)
+    def forward(self, x, feature2, feature1):
+        output_dict = dict()
+        output_dict["conv_blocks"] = self._conv_blocks.forward(x)
+        output_dict["up1"] = self._up1.forward(output_dict["conv_blocks"])
+        output_dict["up2"] = self._up2.forward(
+            paddle.concat(
+                (output_dict["up1"], feature2), axis=1))
+        output_dict["up3"] = self._up3.forward(
+            paddle.concat(
+                (output_dict["up2"], feature1), axis=1))
+        output_dict["pad2d"] = self._pad2d.forward(output_dict["up3"])
+        output_dict["out_conv"] = self._out_conv.forward(output_dict["pad2d"])
+        return output_dict
--- a/tools/style_text_rec/arch/encoder.py
+++ b/tools/style_text_rec/arch/encoder.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import paddle
+import paddle.nn as nn
+from arch.base_module import SNConv, SNConvTranspose, ResBlock
+class Encoder(nn.Layer):
+    def __init__(self, name, in_channels, encode_dim, use_bias, norm_layer,
+                 act, act_attr, conv_block_dropout, conv_block_num,
+                 conv_block_dilation):
+        super(Encoder, self).__init__()
+        self._pad2d = paddle.nn.Pad2D([3, 3, 3, 3], mode="replicate")
+        self._in_conv = SNConv(
+            name=name + "_in_conv",
+            in_channels=in_channels,
+            out_channels=encode_dim,
+            kernel_size=7,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act=act,
+            act_attr=act_attr)
+        self._down1 = SNConv(
+            name=name + "_down1",
+            in_channels=encode_dim,
+            out_channels=encode_dim * 2,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act=act,
+            act_attr=act_attr)
+        self._down2 = SNConv(
+            name=name + "_down2",
+            in_channels=encode_dim * 2,
+            out_channels=encode_dim * 4,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act=act,
+            act_attr=act_attr)
+        self._down3 = SNConv(
+            name=name + "_down3",
+            in_channels=encode_dim * 4,
+            out_channels=encode_dim * 4,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act=act,
+            act_attr=act_attr)
+        conv_blocks = []
+        for i in range(conv_block_num):
+            conv_blocks.append(
+                ResBlock(
+                    name="{}_conv_block_{}".format(name, i),
+                    channels=encode_dim * 4,
+                    norm_layer=norm_layer,
+                    use_dropout=conv_block_dropout,
+                    use_dilation=conv_block_dilation,
+                    use_bias=use_bias))
+        self._conv_blocks = nn.Sequential(*conv_blocks)
+    def forward(self, x):
+        out_dict = dict()
+        x = self._pad2d(x)
+        out_dict["in_conv"] = self._in_conv.forward(x)
+        out_dict["down1"] = self._down1.forward(out_dict["in_conv"])
+        out_dict["down2"] = self._down2.forward(out_dict["down1"])
+        out_dict["down3"] = self._down3.forward(out_dict["down2"])
+        out_dict["res_blocks"] = self._conv_blocks.forward(out_dict["down3"])
+        return out_dict
+class EncoderUnet(nn.Layer):
+    def __init__(self, name, in_channels, encode_dim, use_bias, norm_layer,
+                 act, act_attr):
+        super(EncoderUnet, self).__init__()
+        self._pad2d = paddle.nn.Pad2D([3, 3, 3, 3], mode="replicate")
+        self._in_conv = SNConv(
+            name=name + "_in_conv",
+            in_channels=in_channels,
+            out_channels=encode_dim,
+            kernel_size=7,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act=act,
+            act_attr=act_attr)
+        self._down1 = SNConv(
+            name=name + "_down1",
+            in_channels=encode_dim,
+            out_channels=encode_dim * 2,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act=act,
+            act_attr=act_attr)
+        self._down2 = SNConv(
+            name=name + "_down2",
+            in_channels=encode_dim * 2,
+            out_channels=encode_dim * 2,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act=act,
+            act_attr=act_attr)
+        self._down3 = SNConv(
+            name=name + "_down3",
+            in_channels=encode_dim * 2,
+            out_channels=encode_dim * 2,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act=act,
+            act_attr=act_attr)
+        self._down4 = SNConv(
+            name=name + "_down4",
+            in_channels=encode_dim * 2,
+            out_channels=encode_dim * 2,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act=act,
+            act_attr=act_attr)
+        self._up1 = SNConvTranspose(
+            name=name + "_up1",
+            in_channels=encode_dim * 2,
+            out_channels=encode_dim * 2,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act=act,
+            act_attr=act_attr)
+        self._up2 = SNConvTranspose(
+            name=name + "_up2",
+            in_channels=encode_dim * 4,
+            out_channels=encode_dim * 4,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act=act,
+            act_attr=act_attr)
+    def forward(self, x):
+        output_dict = dict()
+        x = self._pad2d(x)
+        output_dict['in_conv'] = self._in_conv.forward(x)
+        output_dict['down1'] = self._down1.forward(output_dict['in_conv'])
+        output_dict['down2'] = self._down2.forward(output_dict['down1'])
+        output_dict['down3'] = self._down3.forward(output_dict['down2'])
+        output_dict['down4'] = self._down4.forward(output_dict['down3'])
+        output_dict['up1'] = self._up1.forward(output_dict['down4'])
+        output_dict['up2'] = self._up2.forward(
+            paddle.concat(
+                (output_dict['down3'], output_dict['up1']), axis=1))
+        output_dict['concat'] = paddle.concat(
+            (output_dict['down2'], output_dict['up2']), axis=1)
+        return output_dict
--- a/tools/style_text_rec/arch/spectral_norm.py
+++ b/tools/style_text_rec/arch/spectral_norm.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import math
+import numpy as np
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+def normal_(x, mean=0., std=1.):
+    temp_value = paddle.normal(mean, std, shape=x.shape)
+    x.set_value(temp_value)
+    return x
+class SpectralNorm(object):
+    def __init__(self, name='weight', n_power_iterations=1, dim=0, eps=1e-12):
+        self.name = name
+        self.dim = dim
+        if n_power_iterations <= 0:
+            raise ValueError('Expected n_power_iterations to be positive, but '
+                             'got n_power_iterations={}'.format(
+                                 n_power_iterations))
+        self.n_power_iterations = n_power_iterations
+        self.eps = eps
+    def reshape_weight_to_matrix(self, weight):
+        weight_mat = weight
+        if self.dim != 0:
+            # transpose dim to front
+            weight_mat = weight_mat.transpose([
+                self.dim,
+                * [d for d in range(weight_mat.dim()) if d != self.dim]
+            ])
+        height = weight_mat.shape[0]
+        return weight_mat.reshape([height, -1])
+    def compute_weight(self, module, do_power_iteration):
+        weight = getattr(module, self.name + '_orig')
+        u = getattr(module, self.name + '_u')
+        v = getattr(module, self.name + '_v')
+        weight_mat = self.reshape_weight_to_matrix(weight)
+        if do_power_iteration:
+            with paddle.no_grad():
+                for _ in range(self.n_power_iterations):
+                    v.set_value(
+                        F.normalize(
+                            paddle.matmul(
+                                weight_mat,
+                                u,
+                                transpose_x=True,
+                                transpose_y=False),
+                            axis=0,
+                            epsilon=self.eps, ))
+                    u.set_value(
+                        F.normalize(
+                            paddle.matmul(weight_mat, v),
+                            axis=0,
+                            epsilon=self.eps, ))
+                if self.n_power_iterations > 0:
+                    u = u.clone()
+                    v = v.clone()
+        sigma = paddle.dot(u, paddle.mv(weight_mat, v))
+        weight = weight / sigma
+        return weight
+    def remove(self, module):
+        with paddle.no_grad():
+            weight = self.compute_weight(module, do_power_iteration=False)
+        delattr(module, self.name)
+        delattr(module, self.name + '_u')
+        delattr(module, self.name + '_v')
+        delattr(module, self.name + '_orig')
+        module.add_parameter(self.name, weight.detach())
+    def __call__(self, module, inputs):
+        setattr(
+            module,
+            self.name,
+            self.compute_weight(
+                module, do_power_iteration=module.training))
+    @staticmethod
+    def apply(module, name, n_power_iterations, dim, eps):
+        for k, hook in module._forward_pre_hooks.items():
+            if isinstance(hook, SpectralNorm) and hook.name == name:
+                raise RuntimeError(
+                    "Cannot register two spectral_norm hooks on "
+                    "the same parameter {}".format(name))
+        fn = SpectralNorm(name, n_power_iterations, dim, eps)
+        weight = module._parameters[name]
+        with paddle.no_grad():
+            weight_mat = fn.reshape_weight_to_matrix(weight)
+            h, w = weight_mat.shape
+            # randomly initialize u and v
+            u = module.create_parameter([h])
+            u = normal_(u, 0., 1.)
+            v = module.create_parameter([w])
+            v = normal_(v, 0., 1.)
+            u = F.normalize(u, axis=0, epsilon=fn.eps)
+            v = F.normalize(v, axis=0, epsilon=fn.eps)
+        # delete fn.name form parameters, otherwise you can not set attribute
+        del module._parameters[fn.name]
+        module.add_parameter(fn.name + "_orig", weight)
+        # still need to assign weight back as fn.name because all sorts of
+        # things may assume that it exists, e.g., when initializing weights.
+        # However, we can't directly assign as it could be an Parameter and
+        # gets added as a parameter. Instead, we register weight * 1.0 as a plain
+        # attribute.
+        setattr(module, fn.name, weight * 1.0)
+        module.register_buffer(fn.name + "_u", u)
+        module.register_buffer(fn.name + "_v", v)
+        module.register_forward_pre_hook(fn)
+        return fn
+def spectral_norm(module,
+                  name='weight',
+                  n_power_iterations=1,
+                  eps=1e-12,
+                  dim=None):
+    if dim is None:
+        if isinstance(module, (nn.Conv1DTranspose, nn.Conv2DTranspose,
+                               nn.Conv3DTranspose, nn.Linear)):
+            dim = 1
+        else:
+            dim = 0
+    SpectralNorm.apply(module, name, n_power_iterations, dim, eps)
+    return module
--- a/tools/style_text_rec/arch/style_text_rec.py
+++ b/tools/style_text_rec/arch/style_text_rec.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+import cv2
+import math
+import paddle
+import paddle.nn as nn
+from arch.base_module import MiddleNet, ResBlock
+from arch.encoder import Encoder
+from arch.decoder import Decoder, DecoderUnet, SingleDecoder
+from utils.load_params import load_dygraph_pretrain
+from utils.logging import get_logger
+class StyleTextRec(nn.Layer):
+    def __init__(self, config):
+        super(StyleTextRec, self).__init__()
+        self.logger = get_logger()
+        self.text_generator = TextGenerator(config["Predictor"][
+            "text_generator"])
+        self.bg_generator = BgGeneratorWithMask(config["Predictor"][
+            "bg_generator"])
+        self.fusion_generator = FusionGeneratorSimple(config["Predictor"][
+            "fusion_generator"])
+        bg_generator_pretrain = config["Predictor"]["bg_generator"]["pretrain"]
+        text_generator_pretrain = config["Predictor"]["text_generator"][
+            "pretrain"]
+        fusion_generator_pretrain = config["Predictor"]["fusion_generator"][
+            "pretrain"]
+        load_dygraph_pretrain(
+            self.bg_generator,
+            self.logger,
+            path=bg_generator_pretrain,
+            load_static_weights=False)
+        load_dygraph_pretrain(
+            self.text_generator,
+            self.logger,
+            path=text_generator_pretrain,
+            load_static_weights=False)
+        load_dygraph_pretrain(
+            self.fusion_generator,
+            self.logger,
+            path=fusion_generator_pretrain,
+            load_static_weights=False)
+    def forward(self, style_input, text_input):
+        text_gen_output = self.text_generator.forward(style_input, text_input)
+        fake_text = text_gen_output["fake_text"]
+        fake_sk = text_gen_output["fake_sk"]
+        bg_gen_output = self.bg_generator.forward(style_input)
+        bg_encode_feature = bg_gen_output["bg_encode_feature"]
+        bg_decode_feature1 = bg_gen_output["bg_decode_feature1"]
+        bg_decode_feature2 = bg_gen_output["bg_decode_feature2"]
+        fake_bg = bg_gen_output["fake_bg"]
+        fusion_gen_output = self.fusion_generator.forward(fake_text, fake_bg)
+        fake_fusion = fusion_gen_output["fake_fusion"]
+        return {
+            "fake_fusion": fake_fusion,
+            "fake_text": fake_text,
+            "fake_sk": fake_sk,
+            "fake_bg": fake_bg,
+        }
+class TextGenerator(nn.Layer):
+    def __init__(self, config):
+        super(TextGenerator, self).__init__()
+        name = config["module_name"]
+        encode_dim = config["encode_dim"]
+        norm_layer = config["norm_layer"]
+        conv_block_dropout = config["conv_block_dropout"]
+        conv_block_num = config["conv_block_num"]
+        conv_block_dilation = config["conv_block_dilation"]
+        if norm_layer == "InstanceNorm2D":
+            use_bias = True
+        else:
+            use_bias = False
+        self.encoder_text = Encoder(
+            name=name + "_encoder_text",
+            in_channels=3,
+            encode_dim=encode_dim,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act="ReLU",
+            act_attr=None,
+            conv_block_dropout=conv_block_dropout,
+            conv_block_num=conv_block_num,
+            conv_block_dilation=conv_block_dilation)
+        self.encoder_style = Encoder(
+            name=name + "_encoder_style",
+            in_channels=3,
+            encode_dim=encode_dim,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act="ReLU",
+            act_attr=None,
+            conv_block_dropout=conv_block_dropout,
+            conv_block_num=conv_block_num,
+            conv_block_dilation=conv_block_dilation)
+        self.decoder_text = Decoder(
+            name=name + "_decoder_text",
+            encode_dim=encode_dim,
+            out_channels=int(encode_dim / 2),
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act="ReLU",
+            act_attr=None,
+            conv_block_dropout=conv_block_dropout,
+            conv_block_num=conv_block_num,
+            conv_block_dilation=conv_block_dilation,
+            out_conv_act="Tanh",
+            out_conv_act_attr=None)
+        self.decoder_sk = Decoder(
+            name=name + "_decoder_sk",
+            encode_dim=encode_dim,
+            out_channels=1,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act="ReLU",
+            act_attr=None,
+            conv_block_dropout=conv_block_dropout,
+            conv_block_num=conv_block_num,
+            conv_block_dilation=conv_block_dilation,
+            out_conv_act="Sigmoid",
+            out_conv_act_attr=None)
+        self.middle = MiddleNet(
+            name=name + "_middle_net",
+            in_channels=int(encode_dim / 2) + 1,
+            mid_channels=encode_dim,
+            out_channels=3,
+            use_bias=use_bias)
+    def forward(self, style_input, text_input):
+        style_feature = self.encoder_style.forward(style_input)["res_blocks"]
+        text_feature = self.encoder_text.forward(text_input)["res_blocks"]
+        fake_c_temp = self.decoder_text.forward([text_feature,
+                                                 style_feature])["out_conv"]
+        fake_sk = self.decoder_sk.forward([text_feature,
+                                           style_feature])["out_conv"]
+        fake_text = self.middle(paddle.concat((fake_c_temp, fake_sk), axis=1))
+        return {"fake_sk": fake_sk, "fake_text": fake_text}
+class BgGeneratorWithMask(nn.Layer):
+    def __init__(self, config):
+        super(BgGeneratorWithMask, self).__init__()
+        name = config["module_name"]
+        encode_dim = config["encode_dim"]
+        norm_layer = config["norm_layer"]
+        conv_block_dropout = config["conv_block_dropout"]
+        conv_block_num = config["conv_block_num"]
+        conv_block_dilation = config["conv_block_dilation"]
+        self.output_factor = config.get("output_factor", 1.0)
+        if norm_layer == "InstanceNorm2D":
+            use_bias = True
+        else:
+            use_bias = False
+        self.encoder_bg = Encoder(
+            name=name + "_encoder_bg",
+            in_channels=3,
+            encode_dim=encode_dim,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act="ReLU",
+            act_attr=None,
+            conv_block_dropout=conv_block_dropout,
+            conv_block_num=conv_block_num,
+            conv_block_dilation=conv_block_dilation)
+        self.decoder_bg = SingleDecoder(
+            name=name + "_decoder_bg",
+            encode_dim=encode_dim,
+            out_channels=3,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act="ReLU",
+            act_attr=None,
+            conv_block_dropout=conv_block_dropout,
+            conv_block_num=conv_block_num,
+            conv_block_dilation=conv_block_dilation,
+            out_conv_act="Tanh",
+            out_conv_act_attr=None)
+        self.decoder_mask = Decoder(
+            name=name + "_decoder_mask",
+            encode_dim=encode_dim // 2,
+            out_channels=1,
+            use_bias=use_bias,
+            norm_layer=norm_layer,
+            act="ReLU",
+            act_attr=None,
+            conv_block_dropout=conv_block_dropout,
+            conv_block_num=conv_block_num,
+            conv_block_dilation=conv_block_dilation,
+            out_conv_act="Sigmoid",
+            out_conv_act_attr=None)
+        self.middle = MiddleNet(
+            name=name + "_middle_net",
+            in_channels=3 + 1,
+            mid_channels=encode_dim,
+            out_channels=3,
+            use_bias=use_bias)
+    def forward(self, style_input):
+        encode_bg_output = self.encoder_bg(style_input)
+        decode_bg_output = self.decoder_bg(encode_bg_output["res_blocks"],
+                                           encode_bg_output["down2"],
+                                           encode_bg_output["down1"])
+        fake_c_temp = decode_bg_output["out_conv"]
+        fake_bg_mask = self.decoder_mask.forward(encode_bg_output[
+            "res_blocks"])["out_conv"]
+        fake_bg = self.middle(
+            paddle.concat(
+                (fake_c_temp, fake_bg_mask), axis=1))
+        return {
+            "bg_encode_feature": encode_bg_output["res_blocks"],
+            "bg_decode_feature1": decode_bg_output["up1"],
+            "bg_decode_feature2": decode_bg_output["up2"],
+            "fake_bg": fake_bg,
+            "fake_bg_mask": fake_bg_mask,
+        }
+class FusionGeneratorSimple(nn.Layer):
+    def __init__(self, config):
+        super(FusionGeneratorSimple, self).__init__()
+        name = config["module_name"]
+        encode_dim = config["encode_dim"]
+        norm_layer = config["norm_layer"]
+        conv_block_dropout = config["conv_block_dropout"]
+        conv_block_dilation = config["conv_block_dilation"]
+        if norm_layer == "InstanceNorm2D":
+            use_bias = True
+        else:
+            use_bias = False
+        self._conv = nn.Conv2D(
+            in_channels=6,
+            out_channels=encode_dim,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            groups=1,
+            weight_attr=paddle.ParamAttr(name=name + "_conv_weights"),
+            bias_attr=False)
+        self._res_block = ResBlock(
+            name="{}_conv_block".format(name),
+            channels=encode_dim,
+            norm_layer=norm_layer,
+            use_dropout=conv_block_dropout,
+            use_dilation=conv_block_dilation,
+            use_bias=use_bias)
+        self._reduce_conv = nn.Conv2D(
+            in_channels=encode_dim,
+            out_channels=3,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            groups=1,
+            weight_attr=paddle.ParamAttr(name=name + "_reduce_conv_weights"),
+            bias_attr=False)
+    def forward(self, fake_text, fake_bg):
+        fake_concat = paddle.concat((fake_text, fake_bg), axis=1)
+        fake_concat_tmp = self._conv(fake_concat)
+        output_res = self._res_block(fake_concat_tmp)
+        fake_fusion = self._reduce_conv(output_res)
+        return {"fake_fusion": fake_fusion}
--- a/tools/style_text_rec/configs/config.yml
+++ b/tools/style_text_rec/configs/config.yml
+Global:
+  output_num: 10
+  output_dir: output_data
+  use_gpu: false
+  image_height: 32
+  image_width: 320
+TextDrawer:
+  fonts:
+    en: fonts/en_standard.ttf
+    ch: fonts/ch_standard.ttf
+    ko: fonts/ko_standard.ttf
+Predictor:
+  method: StyleTextRecPredictor
+  algorithm: StyleTextRec
+  scale: 0.00392156862745098
+  mean:
+  - 0.5
+  - 0.5
+  - 0.5
+  std:
+  - 0.5
+  - 0.5
+  - 0.5
+  expand_result: false
+  bg_generator:
+    pretrain: style_text_models/bg_generator
+    module_name: bg_generator
+    generator_type: BgGeneratorWithMask
+    encode_dim: 64
+    norm_layer: null
+    conv_block_num: 4
+    conv_block_dropout: false
+    conv_block_dilation: true
+    output_factor: 1.05
+  text_generator:
+    pretrain: style_text_models/text_generator
+    module_name: text_generator
+    generator_type: TextGenerator
+    encode_dim: 64
+    norm_layer: InstanceNorm2D
+    conv_block_num: 4
+    conv_block_dropout: false
+    conv_block_dilation: true
+  fusion_generator:
+    pretrain: style_text_models/fusion_generator
+    module_name: fusion_generator
+    generator_type: FusionGeneratorSimple
+    encode_dim: 64
+    norm_layer: null
+    conv_block_num: 4
+    conv_block_dropout: false
+    conv_block_dilation: true
+Writer:
+  method: SimpleWriter
--- a/tools/style_text_rec/configs/dataset_config.yml
+++ b/tools/style_text_rec/configs/dataset_config.yml
+Global:
+  output_num: 10
+  output_dir: output_data
+  use_gpu: false
+  image_height: 32
+  image_width: 320
+  standard_font: fonts/en_standard.ttf
+TextDrawer:
+  fonts:
+    en: fonts/en_standard.ttf
+    ch: fonts/ch_standard.ttf
+    ko: fonts/ko_standard.ttf
+StyleSampler:
+  method: DatasetSampler
+  image_home: examples
+  label_file: examples/image_list.txt
+  with_label: true
+CorpusGenerator:
+  method: FileCorpus
+  language: ch
+  corpus_file: examples/corpus/example.txt
+Predictor:
+  method: StyleTextRecPredictor
+  algorithm: StyleTextRec
+  scale: 0.00392156862745098
+  mean:
+  - 0.5
+  - 0.5
+  - 0.5
+  std:
+  - 0.5
+  - 0.5
+  - 0.5
+  expand_result: false
+  bg_generator:
+    pretrain: style_text_models/bg_generator
+    module_name: bg_generator
+    generator_type: BgGeneratorWithMask
+    encode_dim: 64
+    norm_layer: null
+    conv_block_num: 4
+    conv_block_dropout: false
+    conv_block_dilation: true
+    output_factor: 1.05
+  text_generator:
+    pretrain: style_text_models/text_generator
+    module_name: text_generator
+    generator_type: TextGenerator
+    encode_dim: 64
+    norm_layer: InstanceNorm2D
+    conv_block_num: 4
+    conv_block_dropout: false
+    conv_block_dilation: true
+  fusion_generator:
+    pretrain: style_text_models/fusion_generator
+    module_name: fusion_generator
+    generator_type: FusionGeneratorSimple
+    encode_dim: 64
+    norm_layer: null
+    conv_block_num: 4
+    conv_block_dropout: false
+    conv_block_dilation: true
+Writer:
+  method: SimpleWriter
--- a/tools/style_text_rec/engine/corpus_generators.py
+++ b/tools/style_text_rec/engine/corpus_generators.py
+import random
+from PIL import Image, ImageDraw, ImageFont
+import numpy as np
+from utils.logging import get_logger
+class FileCorpus(object):
+    def __init__(self, config):
+        self.logger = get_logger()
+        self.logger.info("using FileCorpus")
+        self.char_list = " 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+        corpus_file = config["CorpusGenerator"]["corpus_file"]
+        self.language = config["CorpusGenerator"]["language"]
+        with open(corpus_file, 'r') as f:
+            corpus_raw = f.read()
+        self.corpus_list = corpus_raw.split("\n")[:-1]
+        assert len(self.corpus_list) > 0
+        random.shuffle(self.corpus_list)
+        self.index = 0
+    def generate(self, corpus_length=0):
+        if self.index >= len(self.corpus_list):
+            self.index = 0
+            random.shuffle(self.corpus_list)
+        corpus = self.corpus_list[self.index]
+        if corpus_length != 0:
+            corpus = corpus[0:corpus_length]
+        if corpus_length > len(corpus):
+            self.logger.warning("generated corpus is shorter than expected.")
+        self.index += 1
+        return self.language, corpus
+class EnNumCorpus(object):
+    def __init__(self, config):
+        self.logger = get_logger()
+        self.logger.info("using NumberCorpus")
+        self.num_list = "0123456789"
+        self.en_char_list = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+        self.height = config["Global"]["image_height"]
+        self.max_width = config["Global"]["image_width"]
+    def generate(self, corpus_length=0):
+        corpus = ""
+        if corpus_length == 0:
+            corpus_length = random.randint(5, 15)
+        for i in range(corpus_length):
+            if random.random() < 0.2:
+                corpus += "{}".format(random.choice(self.en_char_list))
+            else:
+                corpus += "{}".format(random.choice(self.num_list))
+        return "en", corpus
--- a/tools/style_text_rec/engine/predictors.py
+++ b/tools/style_text_rec/engine/predictors.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+import cv2
+import math
+import paddle
+from arch import style_text_rec
+from utils.sys_funcs import check_gpu
+from utils.logging import get_logger
+class StyleTextRecPredictor(object):
+    def __init__(self, config):
+        algorithm = config['Predictor']['algorithm']
+        assert algorithm in ["StyleTextRec"
+                             ], "Generator {} not supported.".format(algorithm)
+        use_gpu = config["Global"]['use_gpu']
+        check_gpu(use_gpu)
+        self.logger = get_logger()
+        self.generator = getattr(style_text_rec, algorithm)(config)
+        self.height = config["Global"]["image_height"]
+        self.width = config["Global"]["image_width"]
+        self.scale = config["Predictor"]["scale"]
+        self.mean = config["Predictor"]["mean"]
+        self.std = config["Predictor"]["std"]
+        self.expand_result = config["Predictor"]["expand_result"]
+    def predict(self, style_input, text_input):
+        style_input = self.rep_style_input(style_input, text_input)
+        tensor_style_input = self.preprocess(style_input)
+        tensor_text_input = self.preprocess(text_input)
+        style_text_result = self.generator.forward(tensor_style_input,
+                                                   tensor_text_input)
+        fake_fusion = self.postprocess(style_text_result["fake_fusion"])
+        fake_text = self.postprocess(style_text_result["fake_text"])
+        fake_sk = self.postprocess(style_text_result["fake_sk"])
+        fake_bg = self.postprocess(style_text_result["fake_bg"])
+        bbox = self.get_text_boundary(fake_text)
+        if bbox:
+            left, right, top, bottom = bbox
+            fake_fusion = fake_fusion[top:bottom, left:right, :]
+            fake_text = fake_text[top:bottom, left:right, :]
+            fake_sk = fake_sk[top:bottom, left:right, :]
+            fake_bg = fake_bg[top:bottom, left:right, :]
+        # fake_fusion = self.crop_by_text(img_fake_fusion, img_fake_text)
+        return {
+            "fake_fusion": fake_fusion,
+            "fake_text": fake_text,
+            "fake_sk": fake_sk,
+            "fake_bg": fake_bg,
+        }
+    def preprocess(self, img):
+        img = (img.astype('float32') * self.scale - self.mean) / self.std
+        img_height, img_width, channel = img.shape
+        assert channel == 3, "Please use an rgb image."
+        ratio = img_width / float(img_height)
+        if math.ceil(self.height * ratio) > self.width:
+            resized_w = self.width
+        else:
+            resized_w = int(math.ceil(self.height * ratio))
+        img = cv2.resize(img, (resized_w, self.height))
+        new_img = np.zeros([self.height, self.width, 3]).astype('float32')
+        new_img[:, 0:resized_w, :] = img
+        img = new_img.transpose((2, 0, 1))
+        img = img[np.newaxis, :, :, :]
+        return paddle.to_tensor(img)
+    def postprocess(self, tensor):
+        img = tensor.numpy()[0]
+        img = img.transpose((1, 2, 0))
+        img = (img * self.std + self.mean) / self.scale
+        img = np.maximum(img, 0.0)
+        img = np.minimum(img, 255.0)
+        img = img.astype('uint8')
+        return img
+    def rep_style_input(self, style_input, text_input):
+        rep_num = int(1.2 * (text_input.shape[1] / text_input.shape[0]) /
+                      (style_input.shape[1] / style_input.shape[0])) + 1
+        style_input = np.tile(style_input, reps=[1, rep_num, 1])
+        max_width = int(self.width / self.height * style_input.shape[0])
+        style_input = style_input[:, :max_width, :]
+        return style_input
+    def get_text_boundary(self, text_img):
+        img_height = text_img.shape[0]
+        img_width = text_img.shape[1]
+        bounder = 3
+        text_canny_img = cv2.Canny(text_img, 10, 20)
+        edge_num_h = text_canny_img.sum(axis=0)
+        no_zero_list_h = np.where(edge_num_h > 0)[0]
+        edge_num_w = text_canny_img.sum(axis=1)
+        no_zero_list_w = np.where(edge_num_w > 0)[0]
+        if len(no_zero_list_h) == 0 or len(no_zero_list_w) == 0:
+            return None
+        left = max(no_zero_list_h[0] - bounder, 0)
+        right = min(no_zero_list_h[-1] + bounder, img_width)
+        top = max(no_zero_list_w[0] - bounder, 0)
+        bottom = min(no_zero_list_w[-1] + bounder, img_height)
+        return [left, right, top, bottom]
--- a/tools/style_text_rec/engine/style_samplers.py
+++ b/tools/style_text_rec/engine/style_samplers.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+import random
+import cv2
+class DatasetSampler(object):
+    def __init__(self, config):
+        self.image_home = config["StyleSampler"]["image_home"]
+        label_file = config["StyleSampler"]["label_file"]
+        self.dataset_with_label = config["StyleSampler"]["with_label"]
+        self.height = config["Global"]["image_height"]
+        self.index = 0
+        with open(label_file, "r") as f:
+            label_raw = f.read()
+            self.path_label_list = label_raw.split("\n")[:-1]
+        assert len(self.path_label_list) > 0
+        random.shuffle(self.path_label_list)
+    def sample(self):
+        if self.index >= len(self.path_label_list):
+            random.shuffle(self.path_label_list)
+            self.index = 0
+        if self.dataset_with_label:
+            path_label = self.path_label_list[self.index]
+            rel_image_path, label = path_label.split('\t')
+        else:
+            rel_image_path = self.path_label_list[self.index]
+            label = None
+        img_path = "{}/{}".format(self.image_home, rel_image_path)
+        image = cv2.imread(img_path)
+        origin_height = image.shape[0]
+        ratio = self.height / origin_height
+        width = int(image.shape[1] * ratio)
+        height = int(image.shape[0] * ratio)
+        image = cv2.resize(image, (width, height))
+        self.index += 1
+        if label:
+            return {"image": image, "label": label}
+        else:
+            return {"image": image}
+def duplicate_image(image, width):
+    image_width = image.shape[1]
+    dup_num = width // image_width + 1
+    image = np.tile(image, reps=[1, dup_num, 1])
+    cropped_image = image[:, :width, :]
+    return cropped_image
--- a/tools/style_text_rec/engine/synthesisers.py
+++ b/tools/style_text_rec/engine/synthesisers.py
+import os
+from utils.config import ArgsParser, load_config, override_config
+from utils.logging import get_logger
+from engine import style_samplers, corpus_generators, text_drawers, predictors, writers
+class ImageSynthesiser(object):
+    def __init__(self):
+        self.FLAGS = ArgsParser().parse_args()
+        self.config = load_config(self.FLAGS.config)
+        self.config = override_config(self.config, options=self.FLAGS.override)
+        self.output_dir = self.config["Global"]["output_dir"]
+        if not os.path.exists(self.output_dir):
+            os.mkdir(self.output_dir)
+        self.logger = get_logger(
+            log_file='{}/predict.log'.format(self.output_dir))
+        self.text_drawer = text_drawers.StdTextDrawer(self.config)
+        predictor_method = self.config["Predictor"]["method"]
+        assert predictor_method is not None
+        self.predictor = getattr(predictors, predictor_method)(self.config)
+    def synth_image(self, corpus, style_input, language="en"):
+        corpus, text_input = self.text_drawer.draw_text(corpus, language)
+        synth_result = self.predictor.predict(style_input, text_input)
+        return synth_result
+class DatasetSynthesiser(ImageSynthesiser):
+    def __init__(self):
+        super(DatasetSynthesiser, self).__init__()
+        self.tag = self.FLAGS.tag
+        self.output_num = self.config["Global"]["output_num"]
+        corpus_generator_method = self.config["CorpusGenerator"]["method"]
+        self.corpus_generator = getattr(corpus_generators,
+                                        corpus_generator_method)(self.config)
+        style_sampler_method = self.config["StyleSampler"]["method"]
+        assert style_sampler_method is not None
+        self.style_sampler = style_samplers.DatasetSampler(self.config)
+        self.writer = writers.SimpleWriter(self.config, self.tag)
+    def synth_dataset(self):
+        for i in range(self.output_num):
+            style_data = self.style_sampler.sample()
+            style_input = style_data["image"]
+            corpus_language, text_input_label = self.corpus_generator.generate(
+            )
+            text_input_label, text_input = self.text_drawer.draw_text(
+                text_input_label, corpus_language)
+            synth_result = self.predictor.predict(style_input, text_input)
+            fake_fusion = synth_result["fake_fusion"]
+            self.writer.save_image(fake_fusion, text_input_label)
+        self.writer.save_label()
+        self.writer.merge_label()
--- a/tools/style_text_rec/engine/text_drawers.py
+++ b/tools/style_text_rec/engine/text_drawers.py
+import random
+from PIL import Image, ImageDraw, ImageFont
+import numpy as np
+from utils.logging import get_logger
+class StdTextDrawer(object):
+    def __init__(self, config):
+        self.logger = get_logger()
+        self.max_width = config["Global"]["image_width"]
+        self.char_list = " 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+        self.height = config["Global"]["image_height"]
+        self.font_dict = {}
+        self.load_fonts(config["TextDrawer"]["fonts"])
+        self.support_languages = list(self.font_dict)
+    def load_fonts(self, fonts_config):
+        for language in fonts_config:
+            font_path = fonts_config[language]
+            font_height = self.get_valid_height(font_path)
+            font = ImageFont.truetype(font_path, font_height)
+            self.font_dict[language] = font
+    def get_valid_height(self, font_path):
+        font = ImageFont.truetype(font_path, self.height - 4)
+        _, font_height = font.getsize(self.char_list)
+        if font_height <= self.height - 4:
+            return self.height - 4
+        else:
+            return int((self.height - 4)**2 / font_height)
+    def draw_text(self, corpus, language="en", crop=True):
+        if language not in self.support_languages:
+            self.logger.warning(
+                "language {} not supported, use en instead.".format(language))
+            language = "en"
+        if crop:
+            width = min(self.max_width, len(corpus) * self.height) + 4
+        else:
+            width = len(corpus) * self.height + 4
+        bg = Image.new("RGB", (width, self.height), color=(127, 127, 127))
+        draw = ImageDraw.Draw(bg)
+        char_x = 2
+        font = self.font_dict[language]
+        for i, char_i in enumerate(corpus):
+            char_size = font.getsize(char_i)[0]
+            draw.text((char_x, 2), char_i, fill=(0, 0, 0), font=font)
+            char_x += char_size
+            if char_x >= width:
+                corpus = corpus[0:i + 1]
+                self.logger.warning("corpus length exceed limit: {}".format(
+                    corpus))
+                break
+        text_input = np.array(bg).astype(np.uint8)
+        text_input = text_input[:, 0:char_x, :]
+        return corpus, text_input
--- a/tools/style_text_rec/engine/writers.py
+++ b/tools/style_text_rec/engine/writers.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import cv2
+import glob
+from utils.logging import get_logger
+class SimpleWriter(object):
+    def __init__(self, config, tag):
+        self.logger = get_logger()
+        self.output_dir = config["Global"]["output_dir"]
+        self.counter = 0
+        self.label_dict = {}
+        self.tag = tag
+        self.label_file_index = 0
+    def save_image(self, image, text_input_label):
+        image_home = os.path.join(self.output_dir, "images", self.tag)
+        if not os.path.exists(image_home):
+            os.makedirs(image_home)
+        image_path = os.path.join(image_home, "{}.png".format(self.counter))
+        # todo support continue synth
+        cv2.imwrite(image_path, image)
+        self.logger.info("generate image: {}".format(image_path))
+        image_name = os.path.join(self.tag, "{}.png".format(self.counter))
+        self.label_dict[image_name] = text_input_label
+        self.counter += 1
+        if not self.counter % 100:
+            self.save_label()
+    def save_label(self):
+        label_raw = ""
+        label_home = os.path.join(self.output_dir, "label")
+        if not os.path.exists(label_home):
+            os.mkdir(label_home)
+        for image_path in self.label_dict:
+            label = self.label_dict[image_path]
+            label_raw += "{}\t{}\n".format(image_path, label)
+        label_file_path = os.path.join(label_home,
+                                       "{}_label.txt".format(self.tag))
+        with open(label_file_path, "w") as f:
+            f.write(label_raw)
+        self.label_file_index += 1
+    def merge_label(self):
+        label_raw = ""
+        label_file_regex = os.path.join(self.output_dir, "label",
+                                        "*_label.txt")
+        label_file_list = glob.glob(label_file_regex)
+        for label_file_i in label_file_list:
+            with open(label_file_i, "r") as f:
+                label_raw += f.read()
+        label_file_path = os.path.join(self.output_dir, "label.txt")
+        with open(label_file_path, "w") as f:
+            f.write(label_raw)
--- a/tools/style_text_rec/examples/corpus/example.txt
+++ b/tools/style_text_rec/examples/corpus/example.txt
+PaddleOCR
+飞桨文字识别
--- a/tools/style_text_rec/examples/image_list.txt
+++ b/tools/style_text_rec/examples/image_list.txt
+style_images/1.jpg	NEATNESS
+style_images/2.jpg	锁店君和宾馆
--- a/tools/style_text_rec/examples/style_images/1.jpg
+++ b/tools/style_text_rec/examples/style_images/1.jpg
--- a/tools/style_text_rec/examples/style_images/2.jpg
+++ b/tools/style_text_rec/examples/style_images/2.jpg
--- a/tools/style_text_rec/fonts/ch_standard.ttf
+++ b/tools/style_text_rec/fonts/ch_standard.ttf
--- a/tools/style_text_rec/fonts/en_standard.ttf
+++ b/tools/style_text_rec/fonts/en_standard.ttf
--- a/tools/style_text_rec/fonts/ko_standard.ttf
+++ b/tools/style_text_rec/fonts/ko_standard.ttf