Merge remote-tracking branch 'origin/dygraph' into dy1

19eb7eb8 · Leif · 0afe6c32 · 03b7daa5 · 19eb7eb8 · 19eb7eb8
Commit 19eb7eb8 authored Sep 03, 2021 by Leif
20 changed files
--- a/ppocr/modeling/heads/e2e_pg_head.py
+++ b/ppocr/modeling/heads/e2e_pg_head.py
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+import paddle
+from paddle import nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+
+
+class ConvBNLayer(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride,
+                 padding,
+                 groups=1,
+                 if_act=True,
+                 act=None,
+                 name=None):
+        super(ConvBNLayer, self).__init__()
+        self.if_act = if_act
+        self.act = act
+        self.conv = nn.Conv2D(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            groups=groups,
+            weight_attr=ParamAttr(name=name + '_weights'),
+            bias_attr=False)
+
+        self.bn = nn.BatchNorm(
+            num_channels=out_channels,
+            act=act,
+            param_attr=ParamAttr(name="bn_" + name + "_scale"),
+            bias_attr=ParamAttr(name="bn_" + name + "_offset"),
+            moving_mean_name="bn_" + name + "_mean",
+            moving_variance_name="bn_" + name + "_variance",
+            use_global_stats=False)
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        return x
+
+
+class PGHead(nn.Layer):
+    """
+    """
+
+    def __init__(self, in_channels, **kwargs):
+        super(PGHead, self).__init__()
+        self.conv_f_score1 = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=64,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            act='relu',
+            name="conv_f_score{}".format(1))
+        self.conv_f_score2 = ConvBNLayer(
+            in_channels=64,
+            out_channels=64,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            act='relu',
+            name="conv_f_score{}".format(2))
+        self.conv_f_score3 = ConvBNLayer(
+            in_channels=64,
+            out_channels=128,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            act='relu',
+            name="conv_f_score{}".format(3))
+
+        self.conv1 = nn.Conv2D(
+            in_channels=128,
+            out_channels=1,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            groups=1,
+            weight_attr=ParamAttr(name="conv_f_score{}".format(4)),
+            bias_attr=False)
+
+        self.conv_f_boder1 = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=64,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            act='relu',
+            name="conv_f_boder{}".format(1))
+        self.conv_f_boder2 = ConvBNLayer(
+            in_channels=64,
+            out_channels=64,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            act='relu',
+            name="conv_f_boder{}".format(2))
+        self.conv_f_boder3 = ConvBNLayer(
+            in_channels=64,
+            out_channels=128,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            act='relu',
+            name="conv_f_boder{}".format(3))
+        self.conv2 = nn.Conv2D(
+            in_channels=128,
+            out_channels=4,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            groups=1,
+            weight_attr=ParamAttr(name="conv_f_boder{}".format(4)),
+            bias_attr=False)
+        self.conv_f_char1 = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=128,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            act='relu',
+            name="conv_f_char{}".format(1))
+        self.conv_f_char2 = ConvBNLayer(
+            in_channels=128,
+            out_channels=128,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            act='relu',
+            name="conv_f_char{}".format(2))
+        self.conv_f_char3 = ConvBNLayer(
+            in_channels=128,
+            out_channels=256,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            act='relu',
+            name="conv_f_char{}".format(3))
+        self.conv_f_char4 = ConvBNLayer(
+            in_channels=256,
+            out_channels=256,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            act='relu',
+            name="conv_f_char{}".format(4))
+        self.conv_f_char5 = ConvBNLayer(
+            in_channels=256,
+            out_channels=256,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            act='relu',
+            name="conv_f_char{}".format(5))
+        self.conv3 = nn.Conv2D(
+            in_channels=256,
+            out_channels=37,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            groups=1,
+            weight_attr=ParamAttr(name="conv_f_char{}".format(6)),
+            bias_attr=False)
+
+        self.conv_f_direc1 = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=64,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            act='relu',
+            name="conv_f_direc{}".format(1))
+        self.conv_f_direc2 = ConvBNLayer(
+            in_channels=64,
+            out_channels=64,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            act='relu',
+            name="conv_f_direc{}".format(2))
+        self.conv_f_direc3 = ConvBNLayer(
+            in_channels=64,
+            out_channels=128,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            act='relu',
+            name="conv_f_direc{}".format(3))
+        self.conv4 = nn.Conv2D(
+            in_channels=128,
+            out_channels=2,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            groups=1,
+            weight_attr=ParamAttr(name="conv_f_direc{}".format(4)),
+            bias_attr=False)
+
+    def forward(self, x, targets=None):
+        f_score = self.conv_f_score1(x)
+        f_score = self.conv_f_score2(f_score)
+        f_score = self.conv_f_score3(f_score)
+        f_score = self.conv1(f_score)
+        f_score = F.sigmoid(f_score)
+
+        # f_border
+        f_border = self.conv_f_boder1(x)
+        f_border = self.conv_f_boder2(f_border)
+        f_border = self.conv_f_boder3(f_border)
+        f_border = self.conv2(f_border)
+
+        f_char = self.conv_f_char1(x)
+        f_char = self.conv_f_char2(f_char)
+        f_char = self.conv_f_char3(f_char)
+        f_char = self.conv_f_char4(f_char)
+        f_char = self.conv_f_char5(f_char)
+        f_char = self.conv3(f_char)
+
+        f_direction = self.conv_f_direc1(x)
+        f_direction = self.conv_f_direc2(f_direction)
+        f_direction = self.conv_f_direc3(f_direction)
+        f_direction = self.conv4(f_direction)
+
+        predicts = {}
+        predicts['f_score'] = f_score
+        predicts['f_border'] = f_border
+        predicts['f_char'] = f_char
+        predicts['f_direction'] = f_direction
+        return predicts
--- a/ppocr/modeling/heads/multiheadAttention.py
+++ b/ppocr/modeling/heads/multiheadAttention.py
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+from paddle import nn
+import paddle.nn.functional as F
+from paddle.nn import Linear
+from paddle.nn.initializer import XavierUniform as xavier_uniform_
+from paddle.nn.initializer import Constant as constant_
+from paddle.nn.initializer import XavierNormal as xavier_normal_
+
+zeros_ = constant_(value=0.)
+ones_ = constant_(value=1.)
+
+
+class MultiheadAttention(nn.Layer):
+    """Allows the model to jointly attend to information
+    from different representation subspaces.
+    See reference: Attention Is All You Need
+
+    .. math::
+        \text{MultiHead}(Q, K, V) = \text{Concat}(head_1,\dots,head_h)W^O
+        \text{where} head_i = \text{Attention}(QW_i^Q, KW_i^K, VW_i^V)
+
+    Args:
+        embed_dim: total dimension of the model
+        num_heads: parallel attention layers, or heads
+
+    """
+
+    def __init__(self,
+                 embed_dim,
+                 num_heads,
+                 dropout=0.,
+                 bias=True,
+                 add_bias_kv=False,
+                 add_zero_attn=False):
+        super(MultiheadAttention, self).__init__()
+        self.embed_dim = embed_dim
+        self.num_heads = num_heads
+        self.dropout = dropout
+        self.head_dim = embed_dim // num_heads
+        assert self.head_dim * num_heads == self.embed_dim, "embed_dim must be divisible by num_heads"
+        self.scaling = self.head_dim**-0.5
+        self.out_proj = Linear(embed_dim, embed_dim, bias_attr=bias)
+        self._reset_parameters()
+        self.conv1 = paddle.nn.Conv2D(
+            in_channels=embed_dim, out_channels=embed_dim, kernel_size=(1, 1))
+        self.conv2 = paddle.nn.Conv2D(
+            in_channels=embed_dim, out_channels=embed_dim, kernel_size=(1, 1))
+        self.conv3 = paddle.nn.Conv2D(
+            in_channels=embed_dim, out_channels=embed_dim, kernel_size=(1, 1))
+
+    def _reset_parameters(self):
+        xavier_uniform_(self.out_proj.weight)
+
+    def forward(self,
+                query,
+                key,
+                value,
+                key_padding_mask=None,
+                incremental_state=None,
+                need_weights=True,
+                static_kv=False,
+                attn_mask=None):
+        """
+        Inputs of forward function
+            query: [target length, batch size, embed dim]
+            key: [sequence length, batch size, embed dim]
+            value: [sequence length, batch size, embed dim]
+            key_padding_mask: if True, mask padding based on batch size
+            incremental_state: if provided, previous time steps are cashed
+            need_weights: output attn_output_weights
+            static_kv: key and value are static
+
+        Outputs of forward function
+            attn_output: [target length, batch size, embed dim]
+            attn_output_weights: [batch size, target length, sequence length]
+        """
+        tgt_len, bsz, embed_dim = query.shape
+        assert embed_dim == self.embed_dim
+        assert list(query.shape) == [tgt_len, bsz, embed_dim]
+        assert key.shape == value.shape
+
+        q = self._in_proj_q(query)
+        k = self._in_proj_k(key)
+        v = self._in_proj_v(value)
+        q *= self.scaling
+
+        q = q.reshape([tgt_len, bsz * self.num_heads, self.head_dim]).transpose(
+            [1, 0, 2])
+        k = k.reshape([-1, bsz * self.num_heads, self.head_dim]).transpose(
+            [1, 0, 2])
+        v = v.reshape([-1, bsz * self.num_heads, self.head_dim]).transpose(
+            [1, 0, 2])
+
+        src_len = k.shape[1]
+
+        if key_padding_mask is not None:
+            assert key_padding_mask.shape[0] == bsz
+            assert key_padding_mask.shape[1] == src_len
+
+        attn_output_weights = paddle.bmm(q, k.transpose([0, 2, 1]))
+        assert list(attn_output_weights.
+                    shape) == [bsz * self.num_heads, tgt_len, src_len]
+
+        if attn_mask is not None:
+            attn_mask = attn_mask.unsqueeze(0)
+            attn_output_weights += attn_mask
+        if key_padding_mask is not None:
+            attn_output_weights = attn_output_weights.reshape(
+                [bsz, self.num_heads, tgt_len, src_len])
+            key = key_padding_mask.unsqueeze(1).unsqueeze(2).astype('float32')
+            y = paddle.full(shape=key.shape, dtype='float32', fill_value='-inf')
+            y = paddle.where(key == 0., key, y)
+            attn_output_weights += y
+            attn_output_weights = attn_output_weights.reshape(
+                [bsz * self.num_heads, tgt_len, src_len])
+
+        attn_output_weights = F.softmax(
+            attn_output_weights.astype('float32'),
+            axis=-1,
+            dtype=paddle.float32 if attn_output_weights.dtype == paddle.float16
+            else attn_output_weights.dtype)
+        attn_output_weights = F.dropout(
+            attn_output_weights, p=self.dropout, training=self.training)
+
+        attn_output = paddle.bmm(attn_output_weights, v)
+        assert list(attn_output.
+                    shape) == [bsz * self.num_heads, tgt_len, self.head_dim]
+        attn_output = attn_output.transpose([1, 0, 2]).reshape(
+            [tgt_len, bsz, embed_dim])
+        attn_output = self.out_proj(attn_output)
+
+        if need_weights:
+            # average attention weights over heads
+            attn_output_weights = attn_output_weights.reshape(
+                [bsz, self.num_heads, tgt_len, src_len])
+            attn_output_weights = attn_output_weights.sum(
+                axis=1) / self.num_heads
+        else:
+            attn_output_weights = None
+        return attn_output, attn_output_weights
+
+    def _in_proj_q(self, query):
+        query = query.transpose([1, 2, 0])
+        query = paddle.unsqueeze(query, axis=2)
+        res = self.conv1(query)
+        res = paddle.squeeze(res, axis=2)
+        res = res.transpose([2, 0, 1])
+        return res
+
+    def _in_proj_k(self, key):
+        key = key.transpose([1, 2, 0])
+        key = paddle.unsqueeze(key, axis=2)
+        res = self.conv2(key)
+        res = paddle.squeeze(res, axis=2)
+        res = res.transpose([2, 0, 1])
+        return res
+
+    def _in_proj_v(self, value):
+        value = value.transpose([1, 2, 0])  #(1, 2, 0)
+        value = paddle.unsqueeze(value, axis=2)
+        res = self.conv3(value)
+        res = paddle.squeeze(res, axis=2)
+        res = res.transpose([2, 0, 1])
+        return res
--- a/ppocr/modeling/heads/rec_ctc_head.py
+++ b/ppocr/modeling/heads/rec_ctc_head.py
@@ -23,32 +23,57 @@ from paddle import ParamAttr, nn
 from paddle.nn import functional as F


-def get_para_bias_attr(l2_decay, k, name):
+def get_para_bias_attr(l2_decay, k):
    regularizer = paddle.regularizer.L2Decay(l2_decay)
    stdv = 1.0 / math.sqrt(k * 1.0)
    initializer = nn.initializer.Uniform(-stdv, stdv)
-    weight_attr = ParamAttr(
-        regularizer=regularizer, initializer=initializer, name=name + "_w_attr")
-    bias_attr = ParamAttr(
-        regularizer=regularizer, initializer=initializer, name=name + "_b_attr")
+    weight_attr = ParamAttr(regularizer=regularizer, initializer=initializer)
+    bias_attr = ParamAttr(regularizer=regularizer, initializer=initializer)
    return [weight_attr, bias_attr]


 class CTCHead(nn.Layer):
-    def __init__(self, in_channels, out_channels, fc_decay=0.0004, **kwargs):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 fc_decay=0.0004,
+                 mid_channels=None,
+                 **kwargs):
        super(CTCHead, self).__init__()
-        weight_attr, bias_attr = get_para_bias_attr(
-            l2_decay=fc_decay, k=in_channels, name='ctc_fc')
-        self.fc = nn.Linear(
-            in_channels,
-            out_channels,
-            weight_attr=weight_attr,
-            bias_attr=bias_attr,
-            name='ctc_fc')
+        if mid_channels is None:
+            weight_attr, bias_attr = get_para_bias_attr(
+                l2_decay=fc_decay, k=in_channels)
+            self.fc = nn.Linear(
+                in_channels,
+                out_channels,
+                weight_attr=weight_attr,
+                bias_attr=bias_attr)
+        else:
+            weight_attr1, bias_attr1 = get_para_bias_attr(
+                l2_decay=fc_decay, k=in_channels)
+            self.fc1 = nn.Linear(
+                in_channels,
+                mid_channels,
+                weight_attr=weight_attr1,
+                bias_attr=bias_attr1)
+
+            weight_attr2, bias_attr2 = get_para_bias_attr(
+                l2_decay=fc_decay, k=mid_channels)
+            self.fc2 = nn.Linear(
+                mid_channels,
+                out_channels,
+                weight_attr=weight_attr2,
+                bias_attr=bias_attr2)
        self.out_channels = out_channels
+        self.mid_channels = mid_channels

-    def forward(self, x, labels=None):
-        predicts = self.fc(x)
+    def forward(self, x, targets=None):
+        if self.mid_channels is None:
+            predicts = self.fc(x)
+        else:
+            predicts = self.fc1(x)
+            predicts = self.fc2(predicts)
+            
        if not self.training:
            predicts = F.softmax(predicts, axis=2)
        return predicts
--- a/ppocr/modeling/heads/rec_nrtr_head.py
+++ b/ppocr/modeling/heads/rec_nrtr_head.py
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+import paddle
+import copy
+from paddle import nn
+import paddle.nn.functional as F
+from paddle.nn import LayerList
+from paddle.nn.initializer import XavierNormal as xavier_uniform_
+from paddle.nn import Dropout, Linear, LayerNorm, Conv2D
+import numpy as np
+from ppocr.modeling.heads.multiheadAttention import MultiheadAttention
+from paddle.nn.initializer import Constant as constant_
+from paddle.nn.initializer import XavierNormal as xavier_normal_
+
+zeros_ = constant_(value=0.)
+ones_ = constant_(value=1.)
+
+
+class Transformer(nn.Layer):
+    """A transformer model. User is able to modify the attributes as needed. The architechture
+    is based on the paper "Attention Is All You Need". Ashish Vaswani, Noam Shazeer,
+    Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, Lukasz Kaiser, and
+    Illia Polosukhin. 2017. Attention is all you need. In Advances in Neural Information
+    Processing Systems, pages 6000-6010.
+
+    Args:
+        d_model: the number of expected features in the encoder/decoder inputs (default=512).
+        nhead: the number of heads in the multiheadattention models (default=8).
+        num_encoder_layers: the number of sub-encoder-layers in the encoder (default=6).
+        num_decoder_layers: the number of sub-decoder-layers in the decoder (default=6).
+        dim_feedforward: the dimension of the feedforward network model (default=2048).
+        dropout: the dropout value (default=0.1).
+        custom_encoder: custom encoder (default=None).
+        custom_decoder: custom decoder (default=None).
+
+    """
+
+    def __init__(self,
+                 d_model=512,
+                 nhead=8,
+                 num_encoder_layers=6,
+                 beam_size=0,
+                 num_decoder_layers=6,
+                 dim_feedforward=1024,
+                 attention_dropout_rate=0.0,
+                 residual_dropout_rate=0.1,
+                 custom_encoder=None,
+                 custom_decoder=None,
+                 in_channels=0,
+                 out_channels=0,
+                 dst_vocab_size=99,
+                 scale_embedding=True):
+        super(Transformer, self).__init__()
+        self.embedding = Embeddings(
+            d_model=d_model,
+            vocab=dst_vocab_size,
+            padding_idx=0,
+            scale_embedding=scale_embedding)
+        self.positional_encoding = PositionalEncoding(
+            dropout=residual_dropout_rate,
+            dim=d_model, )
+        if custom_encoder is not None:
+            self.encoder = custom_encoder
+        else:
+            if num_encoder_layers > 0:
+                encoder_layer = TransformerEncoderLayer(
+                    d_model, nhead, dim_feedforward, attention_dropout_rate,
+                    residual_dropout_rate)
+                self.encoder = TransformerEncoder(encoder_layer,
+                                                  num_encoder_layers)
+            else:
+                self.encoder = None
+
+        if custom_decoder is not None:
+            self.decoder = custom_decoder
+        else:
+            decoder_layer = TransformerDecoderLayer(
+                d_model, nhead, dim_feedforward, attention_dropout_rate,
+                residual_dropout_rate)
+            self.decoder = TransformerDecoder(decoder_layer, num_decoder_layers)
+
+        self._reset_parameters()
+        self.beam_size = beam_size
+        self.d_model = d_model
+        self.nhead = nhead
+        self.tgt_word_prj = nn.Linear(d_model, dst_vocab_size, bias_attr=False)
+        w0 = np.random.normal(0.0, d_model**-0.5,
+                              (d_model, dst_vocab_size)).astype(np.float32)
+        self.tgt_word_prj.weight.set_value(w0)
+        self.apply(self._init_weights)
+
+    def _init_weights(self, m):
+
+        if isinstance(m, nn.Conv2D):
+            xavier_normal_(m.weight)
+            if m.bias is not None:
+                zeros_(m.bias)
+
+    def forward_train(self, src, tgt):
+        tgt = tgt[:, :-1]
+
+        tgt_key_padding_mask = self.generate_padding_mask(tgt)
+        tgt = self.embedding(tgt).transpose([1, 0, 2])
+        tgt = self.positional_encoding(tgt)
+        tgt_mask = self.generate_square_subsequent_mask(tgt.shape[0])
+
+        if self.encoder is not None:
+            src = self.positional_encoding(src.transpose([1, 0, 2]))
+            memory = self.encoder(src)
+        else:
+            memory = src.squeeze(2).transpose([2, 0, 1])
+        output = self.decoder(
+            tgt,
+            memory,
+            tgt_mask=tgt_mask,
+            memory_mask=None,
+            tgt_key_padding_mask=tgt_key_padding_mask,
+            memory_key_padding_mask=None)
+        output = output.transpose([1, 0, 2])
+        logit = self.tgt_word_prj(output)
+        return logit
+
+    def forward(self, src, targets=None):
+        """Take in and process masked source/target sequences.
+        Args:
+            src: the sequence to the encoder (required).
+            tgt: the sequence to the decoder (required).
+        Shape:
+            - src: :math:`(S, N, E)`.
+            - tgt: :math:`(T, N, E)`.
+        Examples:
+            >>> output = transformer_model(src, tgt)
+        """
+
+        if self.training:
+            max_len = targets[1].max()
+            tgt = targets[0][:, :2 + max_len]
+            return self.forward_train(src, tgt)
+        else:
+            if self.beam_size > 0:
+                return self.forward_beam(src)
+            else:
+                return self.forward_test(src)
+
+    def forward_test(self, src):
+        bs = src.shape[0]
+        if self.encoder is not None:
+            src = self.positional_encoding(src.transpose([1, 0, 2]))
+            memory = self.encoder(src)
+        else:
+            memory = src.squeeze(2).transpose([2, 0, 1])
+        dec_seq = paddle.full((bs, 1), 2, dtype=paddle.int64)
+        for len_dec_seq in range(1, 25):
+            src_enc = memory.clone()
+            tgt_key_padding_mask = self.generate_padding_mask(dec_seq)
+            dec_seq_embed = self.embedding(dec_seq).transpose([1, 0, 2])
+            dec_seq_embed = self.positional_encoding(dec_seq_embed)
+            tgt_mask = self.generate_square_subsequent_mask(dec_seq_embed.shape[
+                0])
+            output = self.decoder(
+                dec_seq_embed,
+                src_enc,
+                tgt_mask=tgt_mask,
+                memory_mask=None,
+                tgt_key_padding_mask=tgt_key_padding_mask,
+                memory_key_padding_mask=None)
+            dec_output = output.transpose([1, 0, 2])
+
+            dec_output = dec_output[:,
+                                    -1, :]  # Pick the last step: (bh * bm) * d_h
+            word_prob = F.log_softmax(self.tgt_word_prj(dec_output), axis=1)
+            word_prob = word_prob.reshape([1, bs, -1])
+            preds_idx = word_prob.argmax(axis=2)
+
+            if paddle.equal_all(
+                    preds_idx[-1],
+                    paddle.full(
+                        preds_idx[-1].shape, 3, dtype='int64')):
+                break
+
+            preds_prob = word_prob.max(axis=2)
+            dec_seq = paddle.concat(
+                [dec_seq, preds_idx.reshape([-1, 1])], axis=1)
+
+        return dec_seq
+
+    def forward_beam(self, images):
+        ''' Translation work in one batch '''
+
+        def get_inst_idx_to_tensor_position_map(inst_idx_list):
+            ''' Indicate the position of an instance in a tensor. '''
+            return {
+                inst_idx: tensor_position
+                for tensor_position, inst_idx in enumerate(inst_idx_list)
+            }
+
+        def collect_active_part(beamed_tensor, curr_active_inst_idx,
+                                n_prev_active_inst, n_bm):
+            ''' Collect tensor parts associated to active instances. '''
+
+            _, *d_hs = beamed_tensor.shape
+            n_curr_active_inst = len(curr_active_inst_idx)
+            new_shape = (n_curr_active_inst * n_bm, *d_hs)
+
+            beamed_tensor = beamed_tensor.reshape([n_prev_active_inst, -1])
+            beamed_tensor = beamed_tensor.index_select(
+                paddle.to_tensor(curr_active_inst_idx), axis=0)
+            beamed_tensor = beamed_tensor.reshape([*new_shape])
+
+            return beamed_tensor
+
+        def collate_active_info(src_enc, inst_idx_to_position_map,
+                                active_inst_idx_list):
+            # Sentences which are still active are collected,
+            # so the decoder will not run on completed sentences.
+
+            n_prev_active_inst = len(inst_idx_to_position_map)
+            active_inst_idx = [
+                inst_idx_to_position_map[k] for k in active_inst_idx_list
+            ]
+            active_inst_idx = paddle.to_tensor(active_inst_idx, dtype='int64')
+            active_src_enc = collect_active_part(
+                src_enc.transpose([1, 0, 2]), active_inst_idx,
+                n_prev_active_inst, n_bm).transpose([1, 0, 2])
+            active_inst_idx_to_position_map = get_inst_idx_to_tensor_position_map(
+                active_inst_idx_list)
+            return active_src_enc, active_inst_idx_to_position_map
+
+        def beam_decode_step(inst_dec_beams, len_dec_seq, enc_output,
+                             inst_idx_to_position_map, n_bm,
+                             memory_key_padding_mask):
+            ''' Decode and update beam status, and then return active beam idx '''
+
+            def prepare_beam_dec_seq(inst_dec_beams, len_dec_seq):
+                dec_partial_seq = [
+                    b.get_current_state() for b in inst_dec_beams if not b.done
+                ]
+                dec_partial_seq = paddle.stack(dec_partial_seq)
+
+                dec_partial_seq = dec_partial_seq.reshape([-1, len_dec_seq])
+                return dec_partial_seq
+
+            def prepare_beam_memory_key_padding_mask(
+                    inst_dec_beams, memory_key_padding_mask, n_bm):
+                keep = []
+                for idx in (memory_key_padding_mask):
+                    if not inst_dec_beams[idx].done:
+                        keep.append(idx)
+                memory_key_padding_mask = memory_key_padding_mask[
+                    paddle.to_tensor(keep)]
+                len_s = memory_key_padding_mask.shape[-1]
+                n_inst = memory_key_padding_mask.shape[0]
+                memory_key_padding_mask = paddle.concat(
+                    [memory_key_padding_mask for i in range(n_bm)], axis=1)
+                memory_key_padding_mask = memory_key_padding_mask.reshape(
+                    [n_inst * n_bm, len_s])  #repeat(1, n_bm)
+                return memory_key_padding_mask
+
+            def predict_word(dec_seq, enc_output, n_active_inst, n_bm,
+                             memory_key_padding_mask):
+                tgt_key_padding_mask = self.generate_padding_mask(dec_seq)
+                dec_seq = self.embedding(dec_seq).transpose([1, 0, 2])
+                dec_seq = self.positional_encoding(dec_seq)
+                tgt_mask = self.generate_square_subsequent_mask(dec_seq.shape[
+                    0])
+                dec_output = self.decoder(
+                    dec_seq,
+                    enc_output,
+                    tgt_mask=tgt_mask,
+                    tgt_key_padding_mask=tgt_key_padding_mask,
+                    memory_key_padding_mask=memory_key_padding_mask,
+                ).transpose([1, 0, 2])
+                dec_output = dec_output[:,
+                                        -1, :]  # Pick the last step: (bh * bm) * d_h
+                word_prob = F.log_softmax(self.tgt_word_prj(dec_output), axis=1)
+                word_prob = word_prob.reshape([n_active_inst, n_bm, -1])
+                return word_prob
+
+            def collect_active_inst_idx_list(inst_beams, word_prob,
+                                             inst_idx_to_position_map):
+                active_inst_idx_list = []
+                for inst_idx, inst_position in inst_idx_to_position_map.items():
+                    is_inst_complete = inst_beams[inst_idx].advance(word_prob[
+                        inst_position])
+                    if not is_inst_complete:
+                        active_inst_idx_list += [inst_idx]
+
+                return active_inst_idx_list
+
+            n_active_inst = len(inst_idx_to_position_map)
+            dec_seq = prepare_beam_dec_seq(inst_dec_beams, len_dec_seq)
+            memory_key_padding_mask = None
+            word_prob = predict_word(dec_seq, enc_output, n_active_inst, n_bm,
+                                     memory_key_padding_mask)
+            # Update the beam with predicted word prob information and collect incomplete instances
+            active_inst_idx_list = collect_active_inst_idx_list(
+                inst_dec_beams, word_prob, inst_idx_to_position_map)
+            return active_inst_idx_list
+
+        def collect_hypothesis_and_scores(inst_dec_beams, n_best):
+            all_hyp, all_scores = [], []
+            for inst_idx in range(len(inst_dec_beams)):
+                scores, tail_idxs = inst_dec_beams[inst_idx].sort_scores()
+                all_scores += [scores[:n_best]]
+                hyps = [
+                    inst_dec_beams[inst_idx].get_hypothesis(i)
+                    for i in tail_idxs[:n_best]
+                ]
+                all_hyp += [hyps]
+            return all_hyp, all_scores
+
+        with paddle.no_grad():
+            #-- Encode
+
+            if self.encoder is not None:
+                src = self.positional_encoding(images.transpose([1, 0, 2]))
+                src_enc = self.encoder(src).transpose([1, 0, 2])
+            else:
+                src_enc = images.squeeze(2).transpose([0, 2, 1])
+
+            #-- Repeat data for beam search
+            n_bm = self.beam_size
+            n_inst, len_s, d_h = src_enc.shape
+            src_enc = paddle.concat([src_enc for i in range(n_bm)], axis=1)
+            src_enc = src_enc.reshape([n_inst * n_bm, len_s, d_h]).transpose(
+                [1, 0, 2])
+            #-- Prepare beams
+            inst_dec_beams = [Beam(n_bm) for _ in range(n_inst)]
+
+            #-- Bookkeeping for active or not
+            active_inst_idx_list = list(range(n_inst))
+            inst_idx_to_position_map = get_inst_idx_to_tensor_position_map(
+                active_inst_idx_list)
+            #-- Decode
+            for len_dec_seq in range(1, 25):
+                src_enc_copy = src_enc.clone()
+                active_inst_idx_list = beam_decode_step(
+                    inst_dec_beams, len_dec_seq, src_enc_copy,
+                    inst_idx_to_position_map, n_bm, None)
+                if not active_inst_idx_list:
+                    break  # all instances have finished their path to <EOS>
+                src_enc, inst_idx_to_position_map = collate_active_info(
+                    src_enc_copy, inst_idx_to_position_map,
+                    active_inst_idx_list)
+        batch_hyp, batch_scores = collect_hypothesis_and_scores(inst_dec_beams,
+                                                                1)
+        result_hyp = []
+        for bs_hyp in batch_hyp:
+            bs_hyp_pad = bs_hyp[0] + [3] * (25 - len(bs_hyp[0]))
+            result_hyp.append(bs_hyp_pad)
+        return paddle.to_tensor(np.array(result_hyp), dtype=paddle.int64)
+
+    def generate_square_subsequent_mask(self, sz):
+        """Generate a square mask for the sequence. The masked positions are filled with float('-inf').
+            Unmasked positions are filled with float(0.0).
+        """
+        mask = paddle.zeros([sz, sz], dtype='float32')
+        mask_inf = paddle.triu(
+            paddle.full(
+                shape=[sz, sz], dtype='float32', fill_value='-inf'),
+            diagonal=1)
+        mask = mask + mask_inf
+        return mask
+
+    def generate_padding_mask(self, x):
+        padding_mask = x.equal(paddle.to_tensor(0, dtype=x.dtype))
+        return padding_mask
+
+    def _reset_parameters(self):
+        """Initiate parameters in the transformer model."""
+
+        for p in self.parameters():
+            if p.dim() > 1:
+                xavier_uniform_(p)
+
+
+class TransformerEncoder(nn.Layer):
+    """TransformerEncoder is a stack of N encoder layers
+    Args:
+        encoder_layer: an instance of the TransformerEncoderLayer() class (required).
+        num_layers: the number of sub-encoder-layers in the encoder (required).
+        norm: the layer normalization component (optional).
+    """
+
+    def __init__(self, encoder_layer, num_layers):
+        super(TransformerEncoder, self).__init__()
+        self.layers = _get_clones(encoder_layer, num_layers)
+        self.num_layers = num_layers
+
+    def forward(self, src):
+        """Pass the input through the endocder layers in turn.
+        Args:
+            src: the sequnce to the encoder (required).
+            mask: the mask for the src sequence (optional).
+            src_key_padding_mask: the mask for the src keys per batch (optional).
+        """
+        output = src
+
+        for i in range(self.num_layers):
+            output = self.layers[i](output,
+                                    src_mask=None,
+                                    src_key_padding_mask=None)
+
+        return output
+
+
+class TransformerDecoder(nn.Layer):
+    """TransformerDecoder is a stack of N decoder layers
+
+    Args:
+        decoder_layer: an instance of the TransformerDecoderLayer() class (required).
+        num_layers: the number of sub-decoder-layers in the decoder (required).
+        norm: the layer normalization component (optional).
+
+    """
+
+    def __init__(self, decoder_layer, num_layers):
+        super(TransformerDecoder, self).__init__()
+        self.layers = _get_clones(decoder_layer, num_layers)
+        self.num_layers = num_layers
+
+    def forward(self,
+                tgt,
+                memory,
+                tgt_mask=None,
+                memory_mask=None,
+                tgt_key_padding_mask=None,
+                memory_key_padding_mask=None):
+        """Pass the inputs (and mask) through the decoder layer in turn.
+
+        Args:
+            tgt: the sequence to the decoder (required).
+            memory: the sequnce from the last layer of the encoder (required).
+            tgt_mask: the mask for the tgt sequence (optional).
+            memory_mask: the mask for the memory sequence (optional).
+            tgt_key_padding_mask: the mask for the tgt keys per batch (optional).
+            memory_key_padding_mask: the mask for the memory keys per batch (optional).
+        """
+        output = tgt
+        for i in range(self.num_layers):
+            output = self.layers[i](
+                output,
+                memory,
+                tgt_mask=tgt_mask,
+                memory_mask=memory_mask,
+                tgt_key_padding_mask=tgt_key_padding_mask,
+                memory_key_padding_mask=memory_key_padding_mask)
+
+        return output
+
+
+class TransformerEncoderLayer(nn.Layer):
+    """TransformerEncoderLayer is made up of self-attn and feedforward network.
+    This standard encoder layer is based on the paper "Attention Is All You Need".
+    Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez,
+    Lukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. In Advances in
+    Neural Information Processing Systems, pages 6000-6010. Users may modify or implement
+    in a different way during application.
+
+    Args:
+        d_model: the number of expected features in the input (required).
+        nhead: the number of heads in the multiheadattention models (required).
+        dim_feedforward: the dimension of the feedforward network model (default=2048).
+        dropout: the dropout value (default=0.1).
+
+    """
+
+    def __init__(self,
+                 d_model,
+                 nhead,
+                 dim_feedforward=2048,
+                 attention_dropout_rate=0.0,
+                 residual_dropout_rate=0.1):
+        super(TransformerEncoderLayer, self).__init__()
+        self.self_attn = MultiheadAttention(
+            d_model, nhead, dropout=attention_dropout_rate)
+
+        self.conv1 = Conv2D(
+            in_channels=d_model,
+            out_channels=dim_feedforward,
+            kernel_size=(1, 1))
+        self.conv2 = Conv2D(
+            in_channels=dim_feedforward,
+            out_channels=d_model,
+            kernel_size=(1, 1))
+
+        self.norm1 = LayerNorm(d_model)
+        self.norm2 = LayerNorm(d_model)
+        self.dropout1 = Dropout(residual_dropout_rate)
+        self.dropout2 = Dropout(residual_dropout_rate)
+
+    def forward(self, src, src_mask=None, src_key_padding_mask=None):
+        """Pass the input through the endocder layer.
+        Args:
+            src: the sequnce to the encoder layer (required).
+            src_mask: the mask for the src sequence (optional).
+            src_key_padding_mask: the mask for the src keys per batch (optional).
+        """
+        src2 = self.self_attn(
+            src,
+            src,
+            src,
+            attn_mask=src_mask,
+            key_padding_mask=src_key_padding_mask)[0]
+        src = src + self.dropout1(src2)
+        src = self.norm1(src)
+
+        src = src.transpose([1, 2, 0])
+        src = paddle.unsqueeze(src, 2)
+        src2 = self.conv2(F.relu(self.conv1(src)))
+        src2 = paddle.squeeze(src2, 2)
+        src2 = src2.transpose([2, 0, 1])
+        src = paddle.squeeze(src, 2)
+        src = src.transpose([2, 0, 1])
+
+        src = src + self.dropout2(src2)
+        src = self.norm2(src)
+        return src
+
+
+class TransformerDecoderLayer(nn.Layer):
+    """TransformerDecoderLayer is made up of self-attn, multi-head-attn and feedforward network.
+    This standard decoder layer is based on the paper "Attention Is All You Need".
+    Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez,
+    Lukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. In Advances in
+    Neural Information Processing Systems, pages 6000-6010. Users may modify or implement
+    in a different way during application.
+
+    Args:
+        d_model: the number of expected features in the input (required).
+        nhead: the number of heads in the multiheadattention models (required).
+        dim_feedforward: the dimension of the feedforward network model (default=2048).
+        dropout: the dropout value (default=0.1).
+
+    """
+
+    def __init__(self,
+                 d_model,
+                 nhead,
+                 dim_feedforward=2048,
+                 attention_dropout_rate=0.0,
+                 residual_dropout_rate=0.1):
+        super(TransformerDecoderLayer, self).__init__()
+        self.self_attn = MultiheadAttention(
+            d_model, nhead, dropout=attention_dropout_rate)
+        self.multihead_attn = MultiheadAttention(
+            d_model, nhead, dropout=attention_dropout_rate)
+
+        self.conv1 = Conv2D(
+            in_channels=d_model,
+            out_channels=dim_feedforward,
+            kernel_size=(1, 1))
+        self.conv2 = Conv2D(
+            in_channels=dim_feedforward,
+            out_channels=d_model,
+            kernel_size=(1, 1))
+
+        self.norm1 = LayerNorm(d_model)
+        self.norm2 = LayerNorm(d_model)
+        self.norm3 = LayerNorm(d_model)
+        self.dropout1 = Dropout(residual_dropout_rate)
+        self.dropout2 = Dropout(residual_dropout_rate)
+        self.dropout3 = Dropout(residual_dropout_rate)
+
+    def forward(self,
+                tgt,
+                memory,
+                tgt_mask=None,
+                memory_mask=None,
+                tgt_key_padding_mask=None,
+                memory_key_padding_mask=None):
+        """Pass the inputs (and mask) through the decoder layer.
+
+        Args:
+            tgt: the sequence to the decoder layer (required).
+            memory: the sequnce from the last layer of the encoder (required).
+            tgt_mask: the mask for the tgt sequence (optional).
+            memory_mask: the mask for the memory sequence (optional).
+            tgt_key_padding_mask: the mask for the tgt keys per batch (optional).
+            memory_key_padding_mask: the mask for the memory keys per batch (optional).
+
+        """
+        tgt2 = self.self_attn(
+            tgt,
+            tgt,
+            tgt,
+            attn_mask=tgt_mask,
+            key_padding_mask=tgt_key_padding_mask)[0]
+        tgt = tgt + self.dropout1(tgt2)
+        tgt = self.norm1(tgt)
+        tgt2 = self.multihead_attn(
+            tgt,
+            memory,
+            memory,
+            attn_mask=memory_mask,
+            key_padding_mask=memory_key_padding_mask)[0]
+        tgt = tgt + self.dropout2(tgt2)
+        tgt = self.norm2(tgt)
+
+        # default
+        tgt = tgt.transpose([1, 2, 0])
+        tgt = paddle.unsqueeze(tgt, 2)
+        tgt2 = self.conv2(F.relu(self.conv1(tgt)))
+        tgt2 = paddle.squeeze(tgt2, 2)
+        tgt2 = tgt2.transpose([2, 0, 1])
+        tgt = paddle.squeeze(tgt, 2)
+        tgt = tgt.transpose([2, 0, 1])
+
+        tgt = tgt + self.dropout3(tgt2)
+        tgt = self.norm3(tgt)
+        return tgt
+
+
+def _get_clones(module, N):
+    return LayerList([copy.deepcopy(module) for i in range(N)])
+
+
+class PositionalEncoding(nn.Layer):
+    """Inject some information about the relative or absolute position of the tokens
+        in the sequence. The positional encodings have the same dimension as
+        the embeddings, so that the two can be summed. Here, we use sine and cosine
+        functions of different frequencies.
+    .. math::
+        \text{PosEncoder}(pos, 2i) = sin(pos/10000^(2i/d_model))
+        \text{PosEncoder}(pos, 2i+1) = cos(pos/10000^(2i/d_model))
+        \text{where pos is the word position and i is the embed idx)
+    Args:
+        d_model: the embed dim (required).
+        dropout: the dropout value (default=0.1).
+        max_len: the max. length of the incoming sequence (default=5000).
+    Examples:
+        >>> pos_encoder = PositionalEncoding(d_model)
+    """
+
+    def __init__(self, dropout, dim, max_len=5000):
+        super(PositionalEncoding, self).__init__()
+        self.dropout = nn.Dropout(p=dropout)
+
+        pe = paddle.zeros([max_len, dim])
+        position = paddle.arange(0, max_len, dtype=paddle.float32).unsqueeze(1)
+        div_term = paddle.exp(
+            paddle.arange(0, dim, 2).astype('float32') *
+            (-math.log(10000.0) / dim))
+        pe[:, 0::2] = paddle.sin(position * div_term)
+        pe[:, 1::2] = paddle.cos(position * div_term)
+        pe = pe.unsqueeze(0)
+        pe = pe.transpose([1, 0, 2])
+        self.register_buffer('pe', pe)
+
+    def forward(self, x):
+        """Inputs of forward function
+        Args:
+            x: the sequence fed to the positional encoder model (required).
+        Shape:
+            x: [sequence length, batch size, embed dim]
+            output: [sequence length, batch size, embed dim]
+        Examples:
+            >>> output = pos_encoder(x)
+        """
+        x = x + self.pe[:x.shape[0], :]
+        return self.dropout(x)
+
+
+class PositionalEncoding_2d(nn.Layer):
+    """Inject some information about the relative or absolute position of the tokens
+        in the sequence. The positional encodings have the same dimension as
+        the embeddings, so that the two can be summed. Here, we use sine and cosine
+        functions of different frequencies.
+    .. math::
+        \text{PosEncoder}(pos, 2i) = sin(pos/10000^(2i/d_model))
+        \text{PosEncoder}(pos, 2i+1) = cos(pos/10000^(2i/d_model))
+        \text{where pos is the word position and i is the embed idx)
+    Args:
+        d_model: the embed dim (required).
+        dropout: the dropout value (default=0.1).
+        max_len: the max. length of the incoming sequence (default=5000).
+    Examples:
+        >>> pos_encoder = PositionalEncoding(d_model)
+    """
+
+    def __init__(self, dropout, dim, max_len=5000):
+        super(PositionalEncoding_2d, self).__init__()
+        self.dropout = nn.Dropout(p=dropout)
+
+        pe = paddle.zeros([max_len, dim])
+        position = paddle.arange(0, max_len, dtype=paddle.float32).unsqueeze(1)
+        div_term = paddle.exp(
+            paddle.arange(0, dim, 2).astype('float32') *
+            (-math.log(10000.0) / dim))
+        pe[:, 0::2] = paddle.sin(position * div_term)
+        pe[:, 1::2] = paddle.cos(position * div_term)
+        pe = pe.unsqueeze(0).transpose([1, 0, 2])
+        self.register_buffer('pe', pe)
+
+        self.avg_pool_1 = nn.AdaptiveAvgPool2D((1, 1))
+        self.linear1 = nn.Linear(dim, dim)
+        self.linear1.weight.data.fill_(1.)
+        self.avg_pool_2 = nn.AdaptiveAvgPool2D((1, 1))
+        self.linear2 = nn.Linear(dim, dim)
+        self.linear2.weight.data.fill_(1.)
+
+    def forward(self, x):
+        """Inputs of forward function
+        Args:
+            x: the sequence fed to the positional encoder model (required).
+        Shape:
+            x: [sequence length, batch size, embed dim]
+            output: [sequence length, batch size, embed dim]
+        Examples:
+            >>> output = pos_encoder(x)
+        """
+        w_pe = self.pe[:x.shape[-1], :]
+        w1 = self.linear1(self.avg_pool_1(x).squeeze()).unsqueeze(0)
+        w_pe = w_pe * w1
+        w_pe = w_pe.transpose([1, 2, 0])
+        w_pe = w_pe.unsqueeze(2)
+
+        h_pe = self.pe[:x.shape[-2], :]
+        w2 = self.linear2(self.avg_pool_2(x).squeeze()).unsqueeze(0)
+        h_pe = h_pe * w2
+        h_pe = h_pe.transpose([1, 2, 0])
+        h_pe = h_pe.unsqueeze(3)
+
+        x = x + w_pe + h_pe
+        x = x.reshape(
+            [x.shape[0], x.shape[1], x.shape[2] * x.shape[3]]).transpose(
+                [2, 0, 1])
+
+        return self.dropout(x)
+
+
+class Embeddings(nn.Layer):
+    def __init__(self, d_model, vocab, padding_idx, scale_embedding):
+        super(Embeddings, self).__init__()
+        self.embedding = nn.Embedding(vocab, d_model, padding_idx=padding_idx)
+        w0 = np.random.normal(0.0, d_model**-0.5,
+                              (vocab, d_model)).astype(np.float32)
+        self.embedding.weight.set_value(w0)
+        self.d_model = d_model
+        self.scale_embedding = scale_embedding
+
+    def forward(self, x):
+        if self.scale_embedding:
+            x = self.embedding(x)
+            return x * math.sqrt(self.d_model)
+        return self.embedding(x)
+
+
+class Beam():
+    ''' Beam search '''
+
+    def __init__(self, size, device=False):
+
+        self.size = size
+        self._done = False
+        # The score for each translation on the beam.
+        self.scores = paddle.zeros((size, ), dtype=paddle.float32)
+        self.all_scores = []
+        # The backpointers at each time-step.
+        self.prev_ks = []
+        # The outputs at each time-step.
+        self.next_ys = [paddle.full((size, ), 0, dtype=paddle.int64)]
+        self.next_ys[0][0] = 2
+
+    def get_current_state(self):
+        "Get the outputs for the current timestep."
+        return self.get_tentative_hypothesis()
+
+    def get_current_origin(self):
+        "Get the backpointers for the current timestep."
+        return self.prev_ks[-1]
+
+    @property
+    def done(self):
+        return self._done
+
+    def advance(self, word_prob):
+        "Update beam status and check if finished or not."
+        num_words = word_prob.shape[1]
+
+        # Sum the previous scores.
+        if len(self.prev_ks) > 0:
+            beam_lk = word_prob + self.scores.unsqueeze(1).expand_as(word_prob)
+        else:
+            beam_lk = word_prob[0]
+
+        flat_beam_lk = beam_lk.reshape([-1])
+        best_scores, best_scores_id = flat_beam_lk.topk(self.size, 0, True,
+                                                        True)  # 1st sort
+        self.all_scores.append(self.scores)
+        self.scores = best_scores
+        # bestScoresId is flattened as a (beam x word) array,
+        # so we need to calculate which word and beam each score came from
+        prev_k = best_scores_id // num_words
+        self.prev_ks.append(prev_k)
+        self.next_ys.append(best_scores_id - prev_k * num_words)
+        # End condition is when top-of-beam is EOS.
+        if self.next_ys[-1][0] == 3:
+            self._done = True
+            self.all_scores.append(self.scores)
+
+        return self._done
+
+    def sort_scores(self):
+        "Sort the scores."
+        return self.scores, paddle.to_tensor(
+            [i for i in range(self.scores.shape[0])], dtype='int32')
+
+    def get_the_best_score_and_idx(self):
+        "Get the score of the best in the beam."
+        scores, ids = self.sort_scores()
+        return scores[1], ids[1]
+
+    def get_tentative_hypothesis(self):
+        "Get the decoded sequence for the current timestep."
+        if len(self.next_ys) == 1:
+            dec_seq = self.next_ys[0].unsqueeze(1)
+        else:
+            _, keys = self.sort_scores()
+            hyps = [self.get_hypothesis(k) for k in keys]
+            hyps = [[2] + h for h in hyps]
+            dec_seq = paddle.to_tensor(hyps, dtype='int64')
+        return dec_seq
+
+    def get_hypothesis(self, k):
+        """ Walk back to construct the full hypothesis. """
+        hyp = []
+        for j in range(len(self.prev_ks) - 1, -1, -1):
+            hyp.append(self.next_ys[j + 1][k])
+            k = self.prev_ks[j][k]
+        return list(map(lambda x: x.item(), hyp[::-1]))
--- a/ppocr/modeling/heads/rec_srn_head.py
+++ b/ppocr/modeling/heads/rec_srn_head.py
@@ -250,7 +250,8 @@ class SRNHead(nn.Layer):

        self.gsrm.wrap_encoder1.prepare_decoder.emb0 = self.gsrm.wrap_encoder0.prepare_decoder.emb0

-    def forward(self, inputs, others):
+    def forward(self, inputs, targets=None):
+        others = targets[-4:]
        encoder_word_pos = others[0]
        gsrm_word_pos = others[1]
        gsrm_slf_attn_bias1 = others[2]

--- a/ppocr/modeling/heads/self_attention.py
+++ b/ppocr/modeling/heads/self_attention.py
@@ -285,8 +285,7 @@ class PrePostProcessLayer(nn.Layer):
            elif cmd == "n":  # add layer normalization
                self.functors.append(
                    self.add_sublayer(
-                        "layer_norm_%d" % len(
-                            self.sublayers(include_sublayers=False)),
+                        "layer_norm_%d" % len(self.sublayers()),
                        paddle.nn.LayerNorm(
                            normalized_shape=d_model,
                            weight_attr=fluid.ParamAttr(
@@ -320,9 +319,7 @@ class PrepareEncoder(nn.Layer):
        self.src_emb_dim = src_emb_dim
        self.src_max_len = src_max_len
        self.emb = paddle.nn.Embedding(
-            num_embeddings=self.src_max_len,
-            embedding_dim=self.src_emb_dim,
-            sparse=True)
+            num_embeddings=self.src_max_len, embedding_dim=self.src_emb_dim)
        self.dropout_rate = dropout_rate

    def forward(self, src_word, src_pos):

--- a/ppocr/modeling/heads/table_att_head.py
+++ b/ppocr/modeling/heads/table_att_head.py
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+import numpy as np
+
+
+class TableAttentionHead(nn.Layer):
+    def __init__(self, in_channels, hidden_size, loc_type, in_max_len=488, **kwargs):
+        super(TableAttentionHead, self).__init__()
+        self.input_size = in_channels[-1]
+        self.hidden_size = hidden_size
+        self.elem_num = 30
+        self.max_text_length = 100
+        self.max_elem_length = 500
+        self.max_cell_num = 500
+
+        self.structure_attention_cell = AttentionGRUCell(
+            self.input_size, hidden_size, self.elem_num, use_gru=False)
+        self.structure_generator = nn.Linear(hidden_size, self.elem_num)
+        self.loc_type = loc_type
+        self.in_max_len = in_max_len
+        
+        if self.loc_type == 1:
+            self.loc_generator = nn.Linear(hidden_size, 4)
+        else:
+            if self.in_max_len == 640:
+                self.loc_fea_trans = nn.Linear(400, self.max_elem_length+1)
+            elif self.in_max_len == 800:
+                self.loc_fea_trans = nn.Linear(625, self.max_elem_length+1)
+            else:
+                self.loc_fea_trans = nn.Linear(256, self.max_elem_length+1)
+            self.loc_generator = nn.Linear(self.input_size + hidden_size, 4)
+            
+    def _char_to_onehot(self, input_char, onehot_dim):
+        input_ont_hot = F.one_hot(input_char, onehot_dim)
+        return input_ont_hot
+
+    def forward(self, inputs, targets=None):
+        # if and else branch are both needed when you want to assign a variable
+        # if you modify the var in just one branch, then the modification will not work.
+        fea = inputs[-1]
+        if len(fea.shape) == 3:
+            pass
+        else:
+            last_shape = int(np.prod(fea.shape[2:])) # gry added
+            fea = paddle.reshape(fea, [fea.shape[0], fea.shape[1], last_shape])
+            fea = fea.transpose([0, 2, 1])  # (NTC)(batch, width, channels)
+        batch_size = fea.shape[0]
+        
+        hidden = paddle.zeros((batch_size, self.hidden_size))
+        output_hiddens = []
+        if self.training and targets is not None:
+            structure = targets[0]
+            for i in range(self.max_elem_length+1):
+                elem_onehots = self._char_to_onehot(
+                    structure[:, i], onehot_dim=self.elem_num)
+                (outputs, hidden), alpha = self.structure_attention_cell(
+                    hidden, fea, elem_onehots)
+                output_hiddens.append(paddle.unsqueeze(outputs, axis=1))
+            output = paddle.concat(output_hiddens, axis=1)
+            structure_probs = self.structure_generator(output)
+            if self.loc_type == 1:
+                loc_preds = self.loc_generator(output)
+                loc_preds = F.sigmoid(loc_preds)
+            else:
+                loc_fea = fea.transpose([0, 2, 1])
+                loc_fea = self.loc_fea_trans(loc_fea)
+                loc_fea = loc_fea.transpose([0, 2, 1])
+                loc_concat = paddle.concat([output, loc_fea], axis=2)
+                loc_preds = self.loc_generator(loc_concat)
+                loc_preds = F.sigmoid(loc_preds)
+        else:
+            temp_elem = paddle.zeros(shape=[batch_size], dtype="int32")
+            structure_probs = None
+            loc_preds = None
+            elem_onehots = None
+            outputs = None
+            alpha = None
+            max_elem_length = paddle.to_tensor(self.max_elem_length)
+            i = 0
+            while i < max_elem_length+1:
+                elem_onehots = self._char_to_onehot(
+                    temp_elem, onehot_dim=self.elem_num)
+                (outputs, hidden), alpha = self.structure_attention_cell(
+                    hidden, fea, elem_onehots)
+                output_hiddens.append(paddle.unsqueeze(outputs, axis=1))
+                structure_probs_step = self.structure_generator(outputs)
+                temp_elem = structure_probs_step.argmax(axis=1, dtype="int32")
+                i += 1
+                
+            output = paddle.concat(output_hiddens, axis=1)
+            structure_probs = self.structure_generator(output)
+            structure_probs = F.softmax(structure_probs)
+            if self.loc_type == 1:
+                loc_preds = self.loc_generator(output)
+                loc_preds = F.sigmoid(loc_preds)
+            else:
+                loc_fea = fea.transpose([0, 2, 1])
+                loc_fea = self.loc_fea_trans(loc_fea)
+                loc_fea = loc_fea.transpose([0, 2, 1])
+                loc_concat = paddle.concat([output, loc_fea], axis=2)
+                loc_preds = self.loc_generator(loc_concat)
+                loc_preds = F.sigmoid(loc_preds)
+        return {'structure_probs':structure_probs, 'loc_preds':loc_preds}
+
+    
+class AttentionGRUCell(nn.Layer):
+    def __init__(self, input_size, hidden_size, num_embeddings, use_gru=False):
+        super(AttentionGRUCell, self).__init__()
+        self.i2h = nn.Linear(input_size, hidden_size, bias_attr=False)
+        self.h2h = nn.Linear(hidden_size, hidden_size)
+        self.score = nn.Linear(hidden_size, 1, bias_attr=False)
+        self.rnn = nn.GRUCell(
+            input_size=input_size + num_embeddings, hidden_size=hidden_size)
+        self.hidden_size = hidden_size
+
+    def forward(self, prev_hidden, batch_H, char_onehots):
+        batch_H_proj = self.i2h(batch_H)
+        prev_hidden_proj = paddle.unsqueeze(self.h2h(prev_hidden), axis=1)
+        res = paddle.add(batch_H_proj, prev_hidden_proj)
+        res = paddle.tanh(res)
+        e = self.score(res)
+        alpha = F.softmax(e, axis=1)
+        alpha = paddle.transpose(alpha, [0, 2, 1])
+        context = paddle.squeeze(paddle.mm(alpha, batch_H), axis=1)
+        concat_context = paddle.concat([context, char_onehots], 1)
+        cur_hidden = self.rnn(concat_context, prev_hidden)
+        return cur_hidden, alpha
+
+
+class AttentionLSTM(nn.Layer):
+    def __init__(self, in_channels, out_channels, hidden_size, **kwargs):
+        super(AttentionLSTM, self).__init__()
+        self.input_size = in_channels
+        self.hidden_size = hidden_size
+        self.num_classes = out_channels
+
+        self.attention_cell = AttentionLSTMCell(
+            in_channels, hidden_size, out_channels, use_gru=False)
+        self.generator = nn.Linear(hidden_size, out_channels)
+
+    def _char_to_onehot(self, input_char, onehot_dim):
+        input_ont_hot = F.one_hot(input_char, onehot_dim)
+        return input_ont_hot
+
+    def forward(self, inputs, targets=None, batch_max_length=25):
+        batch_size = inputs.shape[0]
+        num_steps = batch_max_length
+
+        hidden = (paddle.zeros((batch_size, self.hidden_size)), paddle.zeros(
+            (batch_size, self.hidden_size)))
+        output_hiddens = []
+
+        if targets is not None:
+            for i in range(num_steps):
+                # one-hot vectors for a i-th char
+                char_onehots = self._char_to_onehot(
+                    targets[:, i], onehot_dim=self.num_classes)
+                hidden, alpha = self.attention_cell(hidden, inputs,
+                                                    char_onehots)
+
+                hidden = (hidden[1][0], hidden[1][1])
+                output_hiddens.append(paddle.unsqueeze(hidden[0], axis=1))
+            output = paddle.concat(output_hiddens, axis=1)
+            probs = self.generator(output)
+
+        else:
+            targets = paddle.zeros(shape=[batch_size], dtype="int32")
+            probs = None
+
+            for i in range(num_steps):
+                char_onehots = self._char_to_onehot(
+                    targets, onehot_dim=self.num_classes)
+                hidden, alpha = self.attention_cell(hidden, inputs,
+                                                    char_onehots)
+                probs_step = self.generator(hidden[0])
+                hidden = (hidden[1][0], hidden[1][1])
+                if probs is None:
+                    probs = paddle.unsqueeze(probs_step, axis=1)
+                else:
+                    probs = paddle.concat(
+                        [probs, paddle.unsqueeze(
+                            probs_step, axis=1)], axis=1)
+
+                next_input = probs_step.argmax(axis=1)
+
+                targets = next_input
+
+        return probs
+
+
+class AttentionLSTMCell(nn.Layer):
+    def __init__(self, input_size, hidden_size, num_embeddings, use_gru=False):
+        super(AttentionLSTMCell, self).__init__()
+        self.i2h = nn.Linear(input_size, hidden_size, bias_attr=False)
+        self.h2h = nn.Linear(hidden_size, hidden_size)
+        self.score = nn.Linear(hidden_size, 1, bias_attr=False)
+        if not use_gru:
+            self.rnn = nn.LSTMCell(
+                input_size=input_size + num_embeddings, hidden_size=hidden_size)
+        else:
+            self.rnn = nn.GRUCell(
+                input_size=input_size + num_embeddings, hidden_size=hidden_size)
+
+        self.hidden_size = hidden_size
+
+    def forward(self, prev_hidden, batch_H, char_onehots):
+        batch_H_proj = self.i2h(batch_H)
+        prev_hidden_proj = paddle.unsqueeze(self.h2h(prev_hidden[0]), axis=1)
+        res = paddle.add(batch_H_proj, prev_hidden_proj)
+        res = paddle.tanh(res)
+        e = self.score(res)
+
+        alpha = F.softmax(e, axis=1)
+        alpha = paddle.transpose(alpha, [0, 2, 1])
+        context = paddle.squeeze(paddle.mm(alpha, batch_H), axis=1)
+        concat_context = paddle.concat([context, char_onehots], 1)
+        cur_hidden = self.rnn(concat_context, prev_hidden)
+
+        return cur_hidden, alpha
--- a/ppocr/modeling/necks/__init__.py
+++ b/ppocr/modeling/necks/__init__.py
@@ -14,12 +14,15 @@

 __all__ = ['build_neck']

+
 def build_neck(config):
    from .db_fpn import DBFPN
    from .east_fpn import EASTFPN
    from .sast_fpn import SASTFPN
    from .rnn import SequenceEncoder
-    support_dict = ['DBFPN', 'EASTFPN', 'SASTFPN', 'SequenceEncoder']
+    from .pg_fpn import PGFPN
+    from .table_fpn import TableFPN
+    support_dict = ['DBFPN', 'EASTFPN', 'SASTFPN', 'SequenceEncoder', 'PGFPN', 'TableFPN']

    module_name = config.pop('name')
    assert module_name in support_dict, Exception('neck only support {}'.format(

--- a/ppocr/modeling/necks/db_fpn.py
+++ b/ppocr/modeling/necks/db_fpn.py
@@ -32,61 +32,53 @@ class DBFPN(nn.Layer):
            in_channels=in_channels[0],
            out_channels=self.out_channels,
            kernel_size=1,
-            weight_attr=ParamAttr(
-                name='conv2d_51.w_0', initializer=weight_attr),
+            weight_attr=ParamAttr(initializer=weight_attr),
            bias_attr=False)
        self.in3_conv = nn.Conv2D(
            in_channels=in_channels[1],
            out_channels=self.out_channels,
            kernel_size=1,
-            weight_attr=ParamAttr(
-                name='conv2d_50.w_0', initializer=weight_attr),
+            weight_attr=ParamAttr(initializer=weight_attr),
            bias_attr=False)
        self.in4_conv = nn.Conv2D(
            in_channels=in_channels[2],
            out_channels=self.out_channels,
            kernel_size=1,
-            weight_attr=ParamAttr(
-                name='conv2d_49.w_0', initializer=weight_attr),
+            weight_attr=ParamAttr(initializer=weight_attr),
            bias_attr=False)
        self.in5_conv = nn.Conv2D(
            in_channels=in_channels[3],
            out_channels=self.out_channels,
            kernel_size=1,
-            weight_attr=ParamAttr(
-                name='conv2d_48.w_0', initializer=weight_attr),
+            weight_attr=ParamAttr(initializer=weight_attr),
            bias_attr=False)
        self.p5_conv = nn.Conv2D(
            in_channels=self.out_channels,
            out_channels=self.out_channels // 4,
            kernel_size=3,
            padding=1,
-            weight_attr=ParamAttr(
-                name='conv2d_52.w_0', initializer=weight_attr),
+            weight_attr=ParamAttr(initializer=weight_attr),
            bias_attr=False)
        self.p4_conv = nn.Conv2D(
            in_channels=self.out_channels,
            out_channels=self.out_channels // 4,
            kernel_size=3,
            padding=1,
-            weight_attr=ParamAttr(
-                name='conv2d_53.w_0', initializer=weight_attr),
+            weight_attr=ParamAttr(initializer=weight_attr),
            bias_attr=False)
        self.p3_conv = nn.Conv2D(
            in_channels=self.out_channels,
            out_channels=self.out_channels // 4,
            kernel_size=3,
            padding=1,
-            weight_attr=ParamAttr(
-                name='conv2d_54.w_0', initializer=weight_attr),
+            weight_attr=ParamAttr(initializer=weight_attr),
            bias_attr=False)
        self.p2_conv = nn.Conv2D(
            in_channels=self.out_channels,
            out_channels=self.out_channels // 4,
            kernel_size=3,
            padding=1,
-            weight_attr=ParamAttr(
-                name='conv2d_55.w_0', initializer=weight_attr),
+            weight_attr=ParamAttr(initializer=weight_attr),
            bias_attr=False)

    def forward(self, x):

--- a/ppocr/modeling/necks/pg_fpn.py
+++ b/ppocr/modeling/necks/pg_fpn.py
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+from paddle import nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+
+
+class ConvBNLayer(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 groups=1,
+                 is_vd_mode=False,
+                 act=None,
+                 name=None):
+        super(ConvBNLayer, self).__init__()
+
+        self.is_vd_mode = is_vd_mode
+        self._pool2d_avg = nn.AvgPool2D(
+            kernel_size=2, stride=2, padding=0, ceil_mode=True)
+        self._conv = nn.Conv2D(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=(kernel_size - 1) // 2,
+            groups=groups,
+            weight_attr=ParamAttr(name=name + "_weights"),
+            bias_attr=False)
+        if name == "conv1":
+            bn_name = "bn_" + name
+        else:
+            bn_name = "bn" + name[3:]
+        self._batch_norm = nn.BatchNorm(
+            out_channels,
+            act=act,
+            param_attr=ParamAttr(name=bn_name + '_scale'),
+            bias_attr=ParamAttr(bn_name + '_offset'),
+            moving_mean_name=bn_name + '_mean',
+            moving_variance_name=bn_name + '_variance',
+            use_global_stats=False)
+
+    def forward(self, inputs):
+        y = self._conv(inputs)
+        y = self._batch_norm(y)
+        return y
+
+
+class DeConvBNLayer(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size=4,
+                 stride=2,
+                 padding=1,
+                 groups=1,
+                 if_act=True,
+                 act=None,
+                 name=None):
+        super(DeConvBNLayer, self).__init__()
+
+        self.if_act = if_act
+        self.act = act
+        self.deconv = nn.Conv2DTranspose(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            groups=groups,
+            weight_attr=ParamAttr(name=name + '_weights'),
+            bias_attr=False)
+        self.bn = nn.BatchNorm(
+            num_channels=out_channels,
+            act=act,
+            param_attr=ParamAttr(name="bn_" + name + "_scale"),
+            bias_attr=ParamAttr(name="bn_" + name + "_offset"),
+            moving_mean_name="bn_" + name + "_mean",
+            moving_variance_name="bn_" + name + "_variance",
+            use_global_stats=False)
+
+    def forward(self, x):
+        x = self.deconv(x)
+        x = self.bn(x)
+        return x
+
+
+class PGFPN(nn.Layer):
+    def __init__(self, in_channels, **kwargs):
+        super(PGFPN, self).__init__()
+        num_inputs = [2048, 2048, 1024, 512, 256]
+        num_outputs = [256, 256, 192, 192, 128]
+        self.out_channels = 128
+        self.conv_bn_layer_1 = ConvBNLayer(
+            in_channels=3,
+            out_channels=32,
+            kernel_size=3,
+            stride=1,
+            act=None,
+            name='FPN_d1')
+        self.conv_bn_layer_2 = ConvBNLayer(
+            in_channels=64,
+            out_channels=64,
+            kernel_size=3,
+            stride=1,
+            act=None,
+            name='FPN_d2')
+        self.conv_bn_layer_3 = ConvBNLayer(
+            in_channels=256,
+            out_channels=128,
+            kernel_size=3,
+            stride=1,
+            act=None,
+            name='FPN_d3')
+        self.conv_bn_layer_4 = ConvBNLayer(
+            in_channels=32,
+            out_channels=64,
+            kernel_size=3,
+            stride=2,
+            act=None,
+            name='FPN_d4')
+        self.conv_bn_layer_5 = ConvBNLayer(
+            in_channels=64,
+            out_channels=64,
+            kernel_size=3,
+            stride=1,
+            act='relu',
+            name='FPN_d5')
+        self.conv_bn_layer_6 = ConvBNLayer(
+            in_channels=64,
+            out_channels=128,
+            kernel_size=3,
+            stride=2,
+            act=None,
+            name='FPN_d6')
+        self.conv_bn_layer_7 = ConvBNLayer(
+            in_channels=128,
+            out_channels=128,
+            kernel_size=3,
+            stride=1,
+            act='relu',
+            name='FPN_d7')
+        self.conv_bn_layer_8 = ConvBNLayer(
+            in_channels=128,
+            out_channels=128,
+            kernel_size=1,
+            stride=1,
+            act=None,
+            name='FPN_d8')
+
+        self.conv_h0 = ConvBNLayer(
+            in_channels=num_inputs[0],
+            out_channels=num_outputs[0],
+            kernel_size=1,
+            stride=1,
+            act=None,
+            name="conv_h{}".format(0))
+        self.conv_h1 = ConvBNLayer(
+            in_channels=num_inputs[1],
+            out_channels=num_outputs[1],
+            kernel_size=1,
+            stride=1,
+            act=None,
+            name="conv_h{}".format(1))
+        self.conv_h2 = ConvBNLayer(
+            in_channels=num_inputs[2],
+            out_channels=num_outputs[2],
+            kernel_size=1,
+            stride=1,
+            act=None,
+            name="conv_h{}".format(2))
+        self.conv_h3 = ConvBNLayer(
+            in_channels=num_inputs[3],
+            out_channels=num_outputs[3],
+            kernel_size=1,
+            stride=1,
+            act=None,
+            name="conv_h{}".format(3))
+        self.conv_h4 = ConvBNLayer(
+            in_channels=num_inputs[4],
+            out_channels=num_outputs[4],
+            kernel_size=1,
+            stride=1,
+            act=None,
+            name="conv_h{}".format(4))
+
+        self.dconv0 = DeConvBNLayer(
+            in_channels=num_outputs[0],
+            out_channels=num_outputs[0 + 1],
+            name="dconv_{}".format(0))
+        self.dconv1 = DeConvBNLayer(
+            in_channels=num_outputs[1],
+            out_channels=num_outputs[1 + 1],
+            act=None,
+            name="dconv_{}".format(1))
+        self.dconv2 = DeConvBNLayer(
+            in_channels=num_outputs[2],
+            out_channels=num_outputs[2 + 1],
+            act=None,
+            name="dconv_{}".format(2))
+        self.dconv3 = DeConvBNLayer(
+            in_channels=num_outputs[3],
+            out_channels=num_outputs[3 + 1],
+            act=None,
+            name="dconv_{}".format(3))
+        self.conv_g1 = ConvBNLayer(
+            in_channels=num_outputs[1],
+            out_channels=num_outputs[1],
+            kernel_size=3,
+            stride=1,
+            act='relu',
+            name="conv_g{}".format(1))
+        self.conv_g2 = ConvBNLayer(
+            in_channels=num_outputs[2],
+            out_channels=num_outputs[2],
+            kernel_size=3,
+            stride=1,
+            act='relu',
+            name="conv_g{}".format(2))
+        self.conv_g3 = ConvBNLayer(
+            in_channels=num_outputs[3],
+            out_channels=num_outputs[3],
+            kernel_size=3,
+            stride=1,
+            act='relu',
+            name="conv_g{}".format(3))
+        self.conv_g4 = ConvBNLayer(
+            in_channels=num_outputs[4],
+            out_channels=num_outputs[4],
+            kernel_size=3,
+            stride=1,
+            act='relu',
+            name="conv_g{}".format(4))
+        self.convf = ConvBNLayer(
+            in_channels=num_outputs[4],
+            out_channels=num_outputs[4],
+            kernel_size=1,
+            stride=1,
+            act=None,
+            name="conv_f{}".format(4))
+
+    def forward(self, x):
+        c0, c1, c2, c3, c4, c5, c6 = x
+        # FPN_Down_Fusion
+        f = [c0, c1, c2]
+        g = [None, None, None]
+        h = [None, None, None]
+        h[0] = self.conv_bn_layer_1(f[0])
+        h[1] = self.conv_bn_layer_2(f[1])
+        h[2] = self.conv_bn_layer_3(f[2])
+
+        g[0] = self.conv_bn_layer_4(h[0])
+        g[1] = paddle.add(g[0], h[1])
+        g[1] = F.relu(g[1])
+        g[1] = self.conv_bn_layer_5(g[1])
+        g[1] = self.conv_bn_layer_6(g[1])
+
+        g[2] = paddle.add(g[1], h[2])
+        g[2] = F.relu(g[2])
+        g[2] = self.conv_bn_layer_7(g[2])
+        f_down = self.conv_bn_layer_8(g[2])
+
+        # FPN UP Fusion
+        f1 = [c6, c5, c4, c3, c2]
+        g = [None, None, None, None, None]
+        h = [None, None, None, None, None]
+        h[0] = self.conv_h0(f1[0])
+        h[1] = self.conv_h1(f1[1])
+        h[2] = self.conv_h2(f1[2])
+        h[3] = self.conv_h3(f1[3])
+        h[4] = self.conv_h4(f1[4])
+
+        g[0] = self.dconv0(h[0])
+        g[1] = paddle.add(g[0], h[1])
+        g[1] = F.relu(g[1])
+        g[1] = self.conv_g1(g[1])
+        g[1] = self.dconv1(g[1])
+
+        g[2] = paddle.add(g[1], h[2])
+        g[2] = F.relu(g[2])
+        g[2] = self.conv_g2(g[2])
+        g[2] = self.dconv2(g[2])
+
+        g[3] = paddle.add(g[2], h[3])
+        g[3] = F.relu(g[3])
+        g[3] = self.conv_g3(g[3])
+        g[3] = self.dconv3(g[3])
+
+        g[4] = paddle.add(x=g[3], y=h[4])
+        g[4] = F.relu(g[4])
+        g[4] = self.conv_g4(g[4])
+        f_up = self.convf(g[4])
+        f_common = paddle.add(f_down, f_up)
+        f_common = F.relu(f_common)
+        return f_common
--- a/ppocr/modeling/necks/table_fpn.py
+++ b/ppocr/modeling/necks/table_fpn.py
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+from paddle import nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+
+
+class TableFPN(nn.Layer):
+    def __init__(self, in_channels, out_channels, **kwargs):
+        super(TableFPN, self).__init__()
+        self.out_channels = 512
+        weight_attr = paddle.nn.initializer.KaimingUniform()
+        self.in2_conv = nn.Conv2D(
+            in_channels=in_channels[0],
+            out_channels=self.out_channels,
+            kernel_size=1,
+            weight_attr=ParamAttr(initializer=weight_attr),
+            bias_attr=False)
+        self.in3_conv = nn.Conv2D(
+            in_channels=in_channels[1],
+            out_channels=self.out_channels,
+            kernel_size=1,
+            stride = 1,
+            weight_attr=ParamAttr(initializer=weight_attr),
+            bias_attr=False)
+        self.in4_conv = nn.Conv2D(
+            in_channels=in_channels[2],
+            out_channels=self.out_channels,
+            kernel_size=1,
+            weight_attr=ParamAttr(initializer=weight_attr),
+            bias_attr=False)
+        self.in5_conv = nn.Conv2D(
+            in_channels=in_channels[3],
+            out_channels=self.out_channels,
+            kernel_size=1,
+            weight_attr=ParamAttr(initializer=weight_attr),
+            bias_attr=False)
+        self.p5_conv = nn.Conv2D(
+            in_channels=self.out_channels,
+            out_channels=self.out_channels // 4,
+            kernel_size=3,
+            padding=1,
+            weight_attr=ParamAttr(initializer=weight_attr),
+            bias_attr=False)
+        self.p4_conv = nn.Conv2D(
+            in_channels=self.out_channels,
+            out_channels=self.out_channels // 4,
+            kernel_size=3,
+            padding=1,
+            weight_attr=ParamAttr(initializer=weight_attr),
+            bias_attr=False)
+        self.p3_conv = nn.Conv2D(
+            in_channels=self.out_channels,
+            out_channels=self.out_channels // 4,
+            kernel_size=3,
+            padding=1,
+            weight_attr=ParamAttr(initializer=weight_attr),
+            bias_attr=False)
+        self.p2_conv = nn.Conv2D(
+            in_channels=self.out_channels,
+            out_channels=self.out_channels // 4,
+            kernel_size=3,
+            padding=1,
+            weight_attr=ParamAttr(initializer=weight_attr),
+            bias_attr=False)
+        self.fuse_conv = nn.Conv2D(
+            in_channels=self.out_channels * 4,
+            out_channels=512,
+            kernel_size=3,
+            padding=1,
+            weight_attr=ParamAttr(initializer=weight_attr), bias_attr=False)
+
+    def forward(self, x):
+        c2, c3, c4, c5 = x
+
+        in5 = self.in5_conv(c5)
+        in4 = self.in4_conv(c4)
+        in3 = self.in3_conv(c3)
+        in2 = self.in2_conv(c2)
+
+        out4 = in4 + F.upsample(
+            in5, size=in4.shape[2:4], mode="nearest", align_mode=1)  # 1/16
+        out3 = in3 + F.upsample(
+            out4, size=in3.shape[2:4], mode="nearest", align_mode=1)  # 1/8
+        out2 = in2 + F.upsample(
+            out3, size=in2.shape[2:4], mode="nearest", align_mode=1)  # 1/4
+
+        p4 = F.upsample(out4, size=in5.shape[2:4], mode="nearest", align_mode=1)
+        p3 = F.upsample(out3, size=in5.shape[2:4], mode="nearest", align_mode=1)
+        p2 = F.upsample(out2, size=in5.shape[2:4], mode="nearest", align_mode=1)
+        fuse = paddle.concat([in5, p4, p3, p2], axis=1)
+        fuse_conv = self.fuse_conv(fuse) * 0.005
+        return [c5 + fuse_conv]
--- a/ppocr/modeling/transforms/tps.py
+++ b/ppocr/modeling/transforms/tps.py
@@ -230,15 +230,8 @@ class GridGenerator(nn.Layer):
    def build_inv_delta_C_paddle(self, C):
        """ Return inv_delta_C which is needed to calculate T """
        F = self.F
-        hat_C = paddle.zeros((F, F), dtype='float64')  # F x F
-        for i in range(0, F):
-            for j in range(i, F):
-                if i == j:
-                    hat_C[i, j] = 1
-                else:
-                    r = paddle.norm(C[i] - C[j])
-                    hat_C[i, j] = r
-                    hat_C[j, i] = r
+        hat_eye = paddle.eye(F, dtype='float64')  # F x F
+        hat_C = paddle.norm(C.reshape([1, F, 2]) - C.reshape([F, 1, 2]), axis=2) + hat_eye
        hat_C = (hat_C**2) * paddle.log(hat_C)
        delta_C = paddle.concat(  # F+3 x F+3
            [

--- a/ppocr/postprocess/__init__.py
+++ b/ppocr/postprocess/__init__.py
@@ -21,17 +21,19 @@ import copy

 __all__ = ['build_post_process']

+from .db_postprocess import DBPostProcess, DistillationDBPostProcess
+from .east_postprocess import EASTPostProcess
+from .sast_postprocess import SASTPostProcess
+from .rec_postprocess import CTCLabelDecode, AttnLabelDecode, SRNLabelDecode, DistillationCTCLabelDecode, NRTRLabelDecode, \
+    TableLabelDecode
+from .cls_postprocess import ClsPostProcess
+from .pg_postprocess import PGPostProcess

 def build_post_process(config, global_config=None):
-    from .db_postprocess import DBPostProcess
-    from .east_postprocess import EASTPostProcess
-    from .sast_postprocess import SASTPostProcess
-    from .rec_postprocess import CTCLabelDecode, AttnLabelDecode, SRNLabelDecode
-    from .cls_postprocess import ClsPostProcess
-
    support_dict = [
        'DBPostProcess', 'EASTPostProcess', 'SASTPostProcess', 'CTCLabelDecode',
-        'AttnLabelDecode', 'ClsPostProcess', 'SRNLabelDecode'
+        'AttnLabelDecode', 'ClsPostProcess', 'SRNLabelDecode', 'PGPostProcess',
+        'DistillationCTCLabelDecode', 'NRTRLabelDecode', 'TableLabelDecode', 'DistillationDBPostProcess'
    ]

    config = copy.deepcopy(config)

--- a/ppocr/postprocess/db_postprocess.py
+++ b/ppocr/postprocess/db_postprocess.py
@@ -34,12 +34,18 @@ class DBPostProcess(object):
                 max_candidates=1000,
                 unclip_ratio=2.0,
                 use_dilation=False,
+                 score_mode="fast",
                 **kwargs):
        self.thresh = thresh
        self.box_thresh = box_thresh
        self.max_candidates = max_candidates
        self.unclip_ratio = unclip_ratio
        self.min_size = 3
+        self.score_mode = score_mode
+        assert score_mode in [
+            "slow", "fast"
+        ], "Score mode must be in [slow, fast] but got: {}".format(score_mode)
+
        self.dilation_kernel = None if not use_dilation else np.array(
            [[1, 1], [1, 1]])

@@ -69,7 +75,10 @@ class DBPostProcess(object):
            if sside < self.min_size:
                continue
            points = np.array(points)
-            score = self.box_score_fast(pred, points.reshape(-1, 2))
+            if self.score_mode == "fast":
+                score = self.box_score_fast(pred, points.reshape(-1, 2))
+            else:
+                score = self.box_score_slow(pred, contour)
            if self.box_thresh > score:
                continue

@@ -120,6 +129,9 @@ class DBPostProcess(object):
        return box, min(bounding_box[1])

    def box_score_fast(self, bitmap, _box):
+        '''
+        box_score_fast: use bbox mean score as the mean score
+        '''
        h, w = bitmap.shape[:2]
        box = _box.copy()
        xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int), 0, w - 1)
@@ -133,6 +145,27 @@ class DBPostProcess(object):
        cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1)
        return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]

+    def box_score_slow(self, bitmap, contour):
+        '''
+        box_score_slow: use polyon mean score as the mean score
+        '''
+        h, w = bitmap.shape[:2]
+        contour = contour.copy()
+        contour = np.reshape(contour, (-1, 2))
+
+        xmin = np.clip(np.min(contour[:, 0]), 0, w - 1)
+        xmax = np.clip(np.max(contour[:, 0]), 0, w - 1)
+        ymin = np.clip(np.min(contour[:, 1]), 0, h - 1)
+        ymax = np.clip(np.max(contour[:, 1]), 0, h - 1)
+
+        mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
+
+        contour[:, 0] = contour[:, 0] - xmin
+        contour[:, 1] = contour[:, 1] - ymin
+
+        cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype(np.int32), 1)
+        return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
+
    def __call__(self, outs_dict, shape_list):
        pred = outs_dict['maps']
        if isinstance(pred, paddle.Tensor):
@@ -154,3 +187,29 @@ class DBPostProcess(object):

            boxes_batch.append({'points': boxes})
        return boxes_batch
+
+
+class DistillationDBPostProcess(object):
+    def __init__(self, model_name=["student"],
+                 key=None,
+                 thresh=0.3,
+                 box_thresh=0.6,
+                 max_candidates=1000,
+                 unclip_ratio=1.5,
+                 use_dilation=False,
+                 score_mode="fast",
+                 **kwargs):
+        self.model_name = model_name
+        self.key = key
+        self.post_process = DBPostProcess(thresh=thresh,
+                 box_thresh=box_thresh,
+                 max_candidates=max_candidates,
+                 unclip_ratio=unclip_ratio,
+                 use_dilation=use_dilation,
+                 score_mode=score_mode)
+
+    def __call__(self, predicts, shape_list):
+        results = {}
+        for k in self.model_name:
+            results[k] = self.post_process(predicts[k], shape_list=shape_list)
+        return results
--- a/ppocr/postprocess/pg_postprocess.py
+++ b/ppocr/postprocess/pg_postprocess.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import sys
+
+__dir__ = os.path.dirname(__file__)
+sys.path.append(__dir__)
+sys.path.append(os.path.join(__dir__, '..'))
+from ppocr.utils.e2e_utils.pgnet_pp_utils import PGNet_PostProcess
+
+
+class PGPostProcess(object):
+    """
+    The post process for PGNet.
+    """
+
+    def __init__(self, character_dict_path, valid_set, score_thresh, mode,
+                 **kwargs):
+        self.character_dict_path = character_dict_path
+        self.valid_set = valid_set
+        self.score_thresh = score_thresh
+        self.mode = mode
+
+        # c++ la-nms is faster, but only support python 3.5
+        self.is_python35 = False
+        if sys.version_info.major == 3 and sys.version_info.minor == 5:
+            self.is_python35 = True
+
+    def __call__(self, outs_dict, shape_list):
+        post = PGNet_PostProcess(self.character_dict_path, self.valid_set,
+                                 self.score_thresh, outs_dict, shape_list)
+        if self.mode == 'fast':
+            data = post.pg_postprocess_fast()
+        else:
+            data = post.pg_postprocess_slow()
+        return data
--- a/ppocr/postprocess/rec_postprocess.py
+++ b/ppocr/postprocess/rec_postprocess.py
@@ -28,7 +28,7 @@ class BaseRecLabelDecode(object):
            'ch', 'en', 'EN_symbol', 'french', 'german', 'japan', 'korean',
            'it', 'xi', 'pu', 'ru', 'ar', 'ta', 'ug', 'fa', 'ur', 'rs', 'oc',
            'rsc', 'bg', 'uk', 'be', 'te', 'ka', 'chinese_cht', 'hi', 'mr',
-            'ne', 'EN'
+            'ne', 'EN', 'latin', 'arabic', 'cyrillic', 'devanagari'
        ]
        assert character_type in support_character_type, "Only {} are supported now but get {}".format(
            support_character_type, character_type)
@@ -44,16 +44,16 @@ class BaseRecLabelDecode(object):
            self.character_str = string.printable[:-6]
            dict_character = list(self.character_str)
        elif character_type in support_character_type:
-            self.character_str = ""
+            self.character_str = []
            assert character_dict_path is not None, "character_dict_path should not be None when character_type is {}".format(
                character_type)
            with open(character_dict_path, "rb") as fin:
                lines = fin.readlines()
                for line in lines:
                    line = line.decode('utf-8').strip("\n").strip("\r\n")
-                    self.character_str += line
+                    self.character_str.append(line)
            if use_space_char:
-                self.character_str += " "
+                self.character_str.append(" ")
            dict_character = list(self.character_str)

        else:
@@ -125,6 +125,100 @@ class CTCLabelDecode(BaseRecLabelDecode):
        return dict_character


+class DistillationCTCLabelDecode(CTCLabelDecode):
+    """
+    Convert 
+    Convert between text-label and text-index
+    """
+
+    def __init__(self,
+                 character_dict_path=None,
+                 character_type='ch',
+                 use_space_char=False,
+                 model_name=["student"],
+                 key=None,
+                 **kwargs):
+        super(DistillationCTCLabelDecode, self).__init__(
+            character_dict_path, character_type, use_space_char)
+        if not isinstance(model_name, list):
+            model_name = [model_name]
+        self.model_name = model_name
+
+        self.key = key
+
+    def __call__(self, preds, label=None, *args, **kwargs):
+        output = dict()
+        for name in self.model_name:
+            pred = preds[name]
+            if self.key is not None:
+                pred = pred[self.key]
+            output[name] = super().__call__(pred, label=label, *args, **kwargs)
+        return output
+
+
+class NRTRLabelDecode(BaseRecLabelDecode):
+    """ Convert between text-label and text-index """
+
+    def __init__(self,
+                 character_dict_path=None,
+                 character_type='EN_symbol',
+                 use_space_char=True,
+                 **kwargs):
+        super(NRTRLabelDecode, self).__init__(character_dict_path,
+                                             character_type, use_space_char)
+
+    def __call__(self, preds, label=None, *args, **kwargs):
+        if preds.dtype == paddle.int64:
+            if isinstance(preds, paddle.Tensor):
+                preds = preds.numpy()
+            if preds[0][0]==2:
+                preds_idx = preds[:,1:]
+            else:
+                preds_idx = preds
+
+            text = self.decode(preds_idx)
+            if label is None:
+                return text
+            label = self.decode(label[:,1:])
+        else:
+            if isinstance(preds, paddle.Tensor):
+                preds = preds.numpy()
+            preds_idx = preds.argmax(axis=2)
+            preds_prob = preds.max(axis=2)
+            text = self.decode(preds_idx, preds_prob, is_remove_duplicate=False)
+            if label is None:
+                return text
+            label = self.decode(label[:,1:])
+        return text, label
+
+    def add_special_char(self, dict_character):
+        dict_character = ['blank','<unk>','<s>','</s>'] + dict_character
+        return dict_character
+    
+    def decode(self, text_index, text_prob=None, is_remove_duplicate=False):
+        """ convert text-index into text-label. """
+        result_list = []
+        batch_size = len(text_index)
+        for batch_idx in range(batch_size):
+            char_list = []
+            conf_list = []
+            for idx in range(len(text_index[batch_idx])):
+                if text_index[batch_idx][idx] == 3: # end
+                    break
+                try:
+                    char_list.append(self.character[int(text_index[batch_idx][idx])])
+                except:
+                    continue
+                if text_prob is not None:
+                    conf_list.append(text_prob[batch_idx][idx])
+                else:
+                    conf_list.append(1)
+            text = ''.join(char_list)
+            result_list.append((text.lower(), np.mean(conf_list)))
+        return result_list
+
+
+
 class AttnLabelDecode(BaseRecLabelDecode):
    """ Convert between text-label and text-index """

@@ -162,8 +256,7 @@ class AttnLabelDecode(BaseRecLabelDecode):
                    if idx > 0 and text_index[batch_idx][idx - 1] == text_index[
                            batch_idx][idx]:
                        continue
-                char_list.append(self.character[int(text_index[batch_idx][
-                    idx])])
+                char_list.append(self.character[int(text_index[batch_idx][idx])])
                if text_prob is not None:
                    conf_list.append(text_prob[batch_idx][idx])
                else:
@@ -218,6 +311,7 @@ class SRNLabelDecode(BaseRecLabelDecode):
                 **kwargs):
        super(SRNLabelDecode, self).__init__(character_dict_path,
                                             character_type, use_space_char)
+        self.max_text_length = kwargs.get('max_text_length', 25)

    def __call__(self, preds, label=None, *args, **kwargs):
        pred = preds['predict']
@@ -229,9 +323,9 @@ class SRNLabelDecode(BaseRecLabelDecode):
        preds_idx = np.argmax(pred, axis=1)
        preds_prob = np.max(pred, axis=1)

-        preds_idx = np.reshape(preds_idx, [-1, 25])
+        preds_idx = np.reshape(preds_idx, [-1, self.max_text_length])

-        preds_prob = np.reshape(preds_prob, [-1, 25])
+        preds_prob = np.reshape(preds_prob, [-1, self.max_text_length])

        text = self.decode(preds_idx, preds_prob)

@@ -287,3 +381,138 @@ class SRNLabelDecode(BaseRecLabelDecode):
            assert False, "unsupport type %s in get_beg_end_flag_idx" \
                          % beg_or_end
        return idx
+
+
+class TableLabelDecode(object):
+    """  """
+
+    def __init__(self,
+                 character_dict_path,
+                 **kwargs):
+        list_character, list_elem = self.load_char_elem_dict(character_dict_path)
+        list_character = self.add_special_char(list_character)
+        list_elem = self.add_special_char(list_elem)
+        self.dict_character = {}
+        self.dict_idx_character = {}
+        for i, char in enumerate(list_character):
+            self.dict_idx_character[i] = char
+            self.dict_character[char] = i
+        self.dict_elem = {}
+        self.dict_idx_elem = {}
+        for i, elem in enumerate(list_elem):
+            self.dict_idx_elem[i] = elem
+            self.dict_elem[elem] = i
+
+    def load_char_elem_dict(self, character_dict_path):
+        list_character = []
+        list_elem = []
+        with open(character_dict_path, "rb") as fin:
+            lines = fin.readlines()
+            substr = lines[0].decode('utf-8').strip("\n").strip("\r\n").split("\t")
+            character_num = int(substr[0])
+            elem_num = int(substr[1])
+            for cno in range(1, 1 + character_num):
+                character = lines[cno].decode('utf-8').strip("\n").strip("\r\n")
+                list_character.append(character)
+            for eno in range(1 + character_num, 1 + character_num + elem_num):
+                elem = lines[eno].decode('utf-8').strip("\n").strip("\r\n")
+                list_elem.append(elem)
+        return list_character, list_elem
+
+    def add_special_char(self, list_character):
+        self.beg_str = "sos"
+        self.end_str = "eos"
+        list_character = [self.beg_str] + list_character + [self.end_str]
+        return list_character
+
+    def __call__(self, preds):
+        structure_probs = preds['structure_probs']
+        loc_preds = preds['loc_preds']
+        if isinstance(structure_probs,paddle.Tensor):
+            structure_probs = structure_probs.numpy()
+        if isinstance(loc_preds,paddle.Tensor):
+            loc_preds = loc_preds.numpy()
+        structure_idx = structure_probs.argmax(axis=2)
+        structure_probs = structure_probs.max(axis=2)
+        structure_str, structure_pos, result_score_list, result_elem_idx_list = self.decode(structure_idx,
+                                                                                            structure_probs, 'elem')
+        res_html_code_list = []
+        res_loc_list = []
+        batch_num = len(structure_str)
+        for bno in range(batch_num):
+            res_loc = []
+            for sno in range(len(structure_str[bno])):
+                text = structure_str[bno][sno]
+                if text in ['<td>', '<td']:
+                    pos = structure_pos[bno][sno]
+                    res_loc.append(loc_preds[bno, pos])
+            res_html_code = ''.join(structure_str[bno])
+            res_loc = np.array(res_loc)
+            res_html_code_list.append(res_html_code)
+            res_loc_list.append(res_loc)
+        return {'res_html_code': res_html_code_list, 'res_loc': res_loc_list, 'res_score_list': result_score_list,
+                'res_elem_idx_list': result_elem_idx_list,'structure_str_list':structure_str}
+
+    def decode(self, text_index, structure_probs, char_or_elem):
+        """convert text-label into text-index.
+        """
+        if char_or_elem == "char":
+            current_dict = self.dict_idx_character
+        else:
+            current_dict = self.dict_idx_elem
+            ignored_tokens = self.get_ignored_tokens('elem')
+            beg_idx, end_idx = ignored_tokens
+
+        result_list = []
+        result_pos_list = []
+        result_score_list = []
+        result_elem_idx_list = []
+        batch_size = len(text_index)
+        for batch_idx in range(batch_size):
+            char_list = []
+            elem_pos_list = []
+            elem_idx_list = []
+            score_list = []
+            for idx in range(len(text_index[batch_idx])):
+                tmp_elem_idx = int(text_index[batch_idx][idx])
+                if idx > 0 and tmp_elem_idx == end_idx:
+                    break
+                if tmp_elem_idx in ignored_tokens:
+                    continue
+
+                char_list.append(current_dict[tmp_elem_idx])
+                elem_pos_list.append(idx)
+                score_list.append(structure_probs[batch_idx, idx])
+                elem_idx_list.append(tmp_elem_idx)
+            result_list.append(char_list)
+            result_pos_list.append(elem_pos_list)
+            result_score_list.append(score_list)
+            result_elem_idx_list.append(elem_idx_list)
+        return result_list, result_pos_list, result_score_list, result_elem_idx_list
+
+    def get_ignored_tokens(self, char_or_elem):
+        beg_idx = self.get_beg_end_flag_idx("beg", char_or_elem)
+        end_idx = self.get_beg_end_flag_idx("end", char_or_elem)
+        return [beg_idx, end_idx]
+
+    def get_beg_end_flag_idx(self, beg_or_end, char_or_elem):
+        if char_or_elem == "char":
+            if beg_or_end == "beg":
+                idx = self.dict_character[self.beg_str]
+            elif beg_or_end == "end":
+                idx = self.dict_character[self.end_str]
+            else:
+                assert False, "Unsupport type %s in get_beg_end_flag_idx of char" \
+                              % beg_or_end
+        elif char_or_elem == "elem":
+            if beg_or_end == "beg":
+                idx = self.dict_elem[self.beg_str]
+            elif beg_or_end == "end":
+                idx = self.dict_elem[self.end_str]
+            else:
+                assert False, "Unsupport type %s in get_beg_end_flag_idx of elem" \
+                              % beg_or_end
+        else:
+            assert False, "Unsupport type %s in char_or_elem" \
+                          % char_or_elem
+        return idx
--- a/ppocr/postprocess/sast_postprocess.py
+++ b/ppocr/postprocess/sast_postprocess.py
@@ -18,6 +18,7 @@ from __future__ import print_function

 import os
 import sys
+
 __dir__ = os.path.dirname(__file__)
 sys.path.append(__dir__)
 sys.path.append(os.path.join(__dir__, '..'))
@@ -49,12 +50,12 @@ class SASTPostProcess(object):
        self.shrink_ratio_of_width = shrink_ratio_of_width
        self.expand_scale = expand_scale
        self.tcl_map_thresh = tcl_map_thresh
-        
+
        # c++ la-nms is faster, but only support python 3.5
        self.is_python35 = False
        if sys.version_info.major == 3 and sys.version_info.minor == 5:
            self.is_python35 = True
-            
+
    def point_pair2poly(self, point_pair_list):
        """
        Transfer vertical point_pairs into poly point in clockwise.
@@ -66,31 +67,42 @@ class SASTPostProcess(object):
            point_list[idx] = point_pair[0]
            point_list[point_num - 1 - idx] = point_pair[1]
        return np.array(point_list).reshape(-1, 2)
-    
-    def shrink_quad_along_width(self, quad, begin_width_ratio=0., end_width_ratio=1.):
+
+    def shrink_quad_along_width(self,
+                                quad,
+                                begin_width_ratio=0.,
+                                end_width_ratio=1.):
        """ 
        Generate shrink_quad_along_width.
        """
-        ratio_pair = np.array([[begin_width_ratio], [end_width_ratio]], dtype=np.float32)
+        ratio_pair = np.array(
+            [[begin_width_ratio], [end_width_ratio]], dtype=np.float32)
        p0_1 = quad[0] + (quad[1] - quad[0]) * ratio_pair
        p3_2 = quad[3] + (quad[2] - quad[3]) * ratio_pair
        return np.array([p0_1[0], p0_1[1], p3_2[1], p3_2[0]])
-    
+
    def expand_poly_along_width(self, poly, shrink_ratio_of_width=0.3):
        """
        expand poly along width.
        """
        point_num = poly.shape[0]
-        left_quad = np.array([poly[0], poly[1], poly[-2], poly[-1]], dtype=np.float32)
+        left_quad = np.array(
+            [poly[0], poly[1], poly[-2], poly[-1]], dtype=np.float32)
        left_ratio = -shrink_ratio_of_width * np.linalg.norm(left_quad[0] - left_quad[3]) / \
-                    (np.linalg.norm(left_quad[0] - left_quad[1]) + 1e-6)
-        left_quad_expand = self.shrink_quad_along_width(left_quad, left_ratio, 1.0)
-        right_quad = np.array([poly[point_num // 2 - 2], poly[point_num // 2 - 1],
-                            poly[point_num // 2], poly[point_num // 2 + 1]], dtype=np.float32)
+                     (np.linalg.norm(left_quad[0] - left_quad[1]) + 1e-6)
+        left_quad_expand = self.shrink_quad_along_width(left_quad, left_ratio,
+                                                        1.0)
+        right_quad = np.array(
+            [
+                poly[point_num // 2 - 2], poly[point_num // 2 - 1],
+                poly[point_num // 2], poly[point_num // 2 + 1]
+            ],
+            dtype=np.float32)
        right_ratio = 1.0 + \
-                    shrink_ratio_of_width * np.linalg.norm(right_quad[0] - right_quad[3]) / \
-                    (np.linalg.norm(right_quad[0] - right_quad[1]) + 1e-6)
-        right_quad_expand = self.shrink_quad_along_width(right_quad, 0.0, right_ratio)
+                      shrink_ratio_of_width * np.linalg.norm(right_quad[0] - right_quad[3]) / \
+                      (np.linalg.norm(right_quad[0] - right_quad[1]) + 1e-6)
+        right_quad_expand = self.shrink_quad_along_width(right_quad, 0.0,
+                                                         right_ratio)
        poly[0] = left_quad_expand[0]
        poly[-1] = left_quad_expand[-1]
        poly[point_num // 2 - 1] = right_quad_expand[1]
@@ -100,7 +112,7 @@ class SASTPostProcess(object):
    def restore_quad(self, tcl_map, tcl_map_thresh, tvo_map):
        """Restore quad."""
        xy_text = np.argwhere(tcl_map[:, :, 0] > tcl_map_thresh)
-        xy_text = xy_text[:, ::-1] # (n, 2)
+        xy_text = xy_text[:, ::-1]  # (n, 2)

        # Sort the text boxes via the y axis
        xy_text = xy_text[np.argsort(xy_text[:, 1])]
@@ -112,7 +124,7 @@ class SASTPostProcess(object):
        point_num = int(tvo_map.shape[-1] / 2)
        assert point_num == 4
        tvo_map = tvo_map[xy_text[:, 1], xy_text[:, 0], :]
-        xy_text_tile = np.tile(xy_text, (1, point_num)) # (n, point_num * 2)
+        xy_text_tile = np.tile(xy_text, (1, point_num))  # (n, point_num * 2)
        quads = xy_text_tile - tvo_map

        return scores, quads, xy_text
@@ -121,14 +133,12 @@ class SASTPostProcess(object):
        """
        compute area of a quad.
        """
-        edge = [
-            (quad[1][0] - quad[0][0]) * (quad[1][1] + quad[0][1]),
-            (quad[2][0] - quad[1][0]) * (quad[2][1] + quad[1][1]),
-            (quad[3][0] - quad[2][0]) * (quad[3][1] + quad[2][1]),
-            (quad[0][0] - quad[3][0]) * (quad[0][1] + quad[3][1])
-        ]
+        edge = [(quad[1][0] - quad[0][0]) * (quad[1][1] + quad[0][1]),
+                (quad[2][0] - quad[1][0]) * (quad[2][1] + quad[1][1]),
+                (quad[3][0] - quad[2][0]) * (quad[3][1] + quad[2][1]),
+                (quad[0][0] - quad[3][0]) * (quad[0][1] + quad[3][1])]
        return np.sum(edge) / 2.
-        
+
    def nms(self, dets):
        if self.is_python35:
            import lanms
@@ -141,7 +151,7 @@ class SASTPostProcess(object):
        """
        Cluster pixels in tcl_map based on quads.
        """
-        instance_count = quads.shape[0] + 1 # contain background
+        instance_count = quads.shape[0] + 1  # contain background
        instance_label_map = np.zeros(tcl_map.shape[:2], dtype=np.int32)
        if instance_count == 1:
            return instance_count, instance_label_map
@@ -149,18 +159,19 @@ class SASTPostProcess(object):
        # predict text center
        xy_text = np.argwhere(tcl_map[:, :, 0] > tcl_map_thresh)
        n = xy_text.shape[0]
-        xy_text = xy_text[:, ::-1] # (n, 2)
-        tco = tco_map[xy_text[:, 1], xy_text[:, 0], :] # (n, 2)
+        xy_text = xy_text[:, ::-1]  # (n, 2)
+        tco = tco_map[xy_text[:, 1], xy_text[:, 0], :]  # (n, 2)
        pred_tc = xy_text - tco
-        
+
        # get gt text center
        m = quads.shape[0]
-        gt_tc = np.mean(quads, axis=1) # (m, 2)
+        gt_tc = np.mean(quads, axis=1)  # (m, 2)

-        pred_tc_tile = np.tile(pred_tc[:, np.newaxis, :], (1, m, 1)) # (n, m, 2)
-        gt_tc_tile = np.tile(gt_tc[np.newaxis, :, :], (n, 1, 1)) # (n, m, 2)
-        dist_mat = np.linalg.norm(pred_tc_tile - gt_tc_tile, axis=2) # (n, m)
-        xy_text_assign = np.argmin(dist_mat, axis=1) + 1 # (n,)
+        pred_tc_tile = np.tile(pred_tc[:, np.newaxis, :],
+                               (1, m, 1))  # (n, m, 2)
+        gt_tc_tile = np.tile(gt_tc[np.newaxis, :, :], (n, 1, 1))  # (n, m, 2)
+        dist_mat = np.linalg.norm(pred_tc_tile - gt_tc_tile, axis=2)  # (n, m)
+        xy_text_assign = np.argmin(dist_mat, axis=1) + 1  # (n,)

        instance_label_map[xy_text[:, 1], xy_text[:, 0]] = xy_text_assign
        return instance_count, instance_label_map
@@ -169,26 +180,47 @@ class SASTPostProcess(object):
        """
        Estimate sample points number.
        """
-        eh = (np.linalg.norm(quad[0] - quad[3]) + np.linalg.norm(quad[1] - quad[2])) / 2.0
-        ew = (np.linalg.norm(quad[0] - quad[1]) + np.linalg.norm(quad[2] - quad[3])) / 2.0
+        eh = (np.linalg.norm(quad[0] - quad[3]) +
+              np.linalg.norm(quad[1] - quad[2])) / 2.0
+        ew = (np.linalg.norm(quad[0] - quad[1]) +
+              np.linalg.norm(quad[2] - quad[3])) / 2.0

        dense_sample_pts_num = max(2, int(ew))
-        dense_xy_center_line = xy_text[np.linspace(0, xy_text.shape[0] - 1, dense_sample_pts_num,
-                                                endpoint=True, dtype=np.float32).astype(np.int32)]
-
-        dense_xy_center_line_diff = dense_xy_center_line[1:] - dense_xy_center_line[:-1]
-        estimate_arc_len = np.sum(np.linalg.norm(dense_xy_center_line_diff, axis=1))
+        dense_xy_center_line = xy_text[np.linspace(
+            0,
+            xy_text.shape[0] - 1,
+            dense_sample_pts_num,
+            endpoint=True,
+            dtype=np.float32).astype(np.int32)]
+
+        dense_xy_center_line_diff = dense_xy_center_line[
+            1:] - dense_xy_center_line[:-1]
+        estimate_arc_len = np.sum(
+            np.linalg.norm(
+                dense_xy_center_line_diff, axis=1))

        sample_pts_num = max(2, int(estimate_arc_len / eh))
        return sample_pts_num

-    def detect_sast(self, tcl_map, tvo_map, tbo_map, tco_map, ratio_w, ratio_h, src_w, src_h, 
-                shrink_ratio_of_width=0.3, tcl_map_thresh=0.5, offset_expand=1.0, out_strid=4.0):
+    def detect_sast(self,
+                    tcl_map,
+                    tvo_map,
+                    tbo_map,
+                    tco_map,
+                    ratio_w,
+                    ratio_h,
+                    src_w,
+                    src_h,
+                    shrink_ratio_of_width=0.3,
+                    tcl_map_thresh=0.5,
+                    offset_expand=1.0,
+                    out_strid=4.0):
        """
        first resize the tcl_map, tvo_map and tbo_map to the input_size, then restore the polys
        """
        # restore quad
-        scores, quads, xy_text = self.restore_quad(tcl_map, tcl_map_thresh, tvo_map)
+        scores, quads, xy_text = self.restore_quad(tcl_map, tcl_map_thresh,
+                                                   tvo_map)
        dets = np.hstack((quads, scores)).astype(np.float32, copy=False)
        dets = self.nms(dets)
        if dets.shape[0] == 0:
@@ -202,7 +234,8 @@ class SASTPostProcess(object):

        # instance segmentation
        # instance_count, instance_label_map = cv2.connectedComponents(tcl_map.astype(np.uint8), connectivity=8)
-        instance_count, instance_label_map = self.cluster_by_quads_tco(tcl_map, tcl_map_thresh, quads, tco_map)
+        instance_count, instance_label_map = self.cluster_by_quads_tco(
+            tcl_map, tcl_map_thresh, quads, tco_map)

        # restore single poly with tcl instance.
        poly_list = []
@@ -212,10 +245,10 @@ class SASTPostProcess(object):
            q_area = quad_areas[instance_idx - 1]
            if q_area < 5:
                continue
-            
+
            #
-            len1 = float(np.linalg.norm(quad[0] -quad[1]))
-            len2 = float(np.linalg.norm(quad[1] -quad[2]))
+            len1 = float(np.linalg.norm(quad[0] - quad[1]))
+            len2 = float(np.linalg.norm(quad[1] - quad[2]))
            min_len = min(len1, len2)
            if min_len < 3:
                continue
@@ -225,16 +258,18 @@ class SASTPostProcess(object):
                continue

            # filter low confidence instance
-            xy_text_scores = tcl_map[xy_text[:, 1], xy_text[:, 0], 0] 
+            xy_text_scores = tcl_map[xy_text[:, 1], xy_text[:, 0], 0]
            if np.sum(xy_text_scores) / quad_areas[instance_idx - 1] < 0.1:
-            # if np.sum(xy_text_scores) / quad_areas[instance_idx - 1] < 0.05:
+                # if np.sum(xy_text_scores) / quad_areas[instance_idx - 1] < 0.05:
                continue

            # sort xy_text
-            left_center_pt = np.array([[(quad[0, 0] + quad[-1, 0]) / 2.0,
-                                        (quad[0, 1] + quad[-1, 1]) / 2.0]]) # (1, 2)
-            right_center_pt = np.array([[(quad[1, 0] + quad[2, 0]) / 2.0,
-                                        (quad[1, 1] + quad[2, 1]) / 2.0]]) # (1, 2)
+            left_center_pt = np.array(
+                [[(quad[0, 0] + quad[-1, 0]) / 2.0,
+                  (quad[0, 1] + quad[-1, 1]) / 2.0]])  # (1, 2)
+            right_center_pt = np.array(
+                [[(quad[1, 0] + quad[2, 0]) / 2.0,
+                  (quad[1, 1] + quad[2, 1]) / 2.0]])  # (1, 2)
            proj_unit_vec = (right_center_pt - left_center_pt) / \
                            (np.linalg.norm(right_center_pt - left_center_pt) + 1e-6)
            proj_value = np.sum(xy_text * proj_unit_vec, axis=1)
@@ -245,33 +280,45 @@ class SASTPostProcess(object):
                sample_pts_num = self.estimate_sample_pts_num(quad, xy_text)
            else:
                sample_pts_num = self.sample_pts_num
-            xy_center_line = xy_text[np.linspace(0, xy_text.shape[0] - 1, sample_pts_num,
-                                                endpoint=True, dtype=np.float32).astype(np.int32)]
+            xy_center_line = xy_text[np.linspace(
+                0,
+                xy_text.shape[0] - 1,
+                sample_pts_num,
+                endpoint=True,
+                dtype=np.float32).astype(np.int32)]

            point_pair_list = []
            for x, y in xy_center_line:
                # get corresponding offset
                offset = tbo_map[y, x, :].reshape(2, 2)
                if offset_expand != 1.0:
-                    offset_length = np.linalg.norm(offset, axis=1, keepdims=True)
-                    expand_length = np.clip(offset_length * (offset_expand - 1), a_min=0.5, a_max=3.0)
+                    offset_length = np.linalg.norm(
+                        offset, axis=1, keepdims=True)
+                    expand_length = np.clip(
+                        offset_length * (offset_expand - 1),
+                        a_min=0.5,
+                        a_max=3.0)
                    offset_detal = offset / offset_length * expand_length
-                    offset = offset + offset_detal                
-                # original point
+                    offset = offset + offset_detal
+                    # original point
                ori_yx = np.array([y, x], dtype=np.float32)
-                point_pair = (ori_yx +  offset)[:, ::-1]* out_strid / np.array([ratio_w, ratio_h]).reshape(-1, 2) 
+                point_pair = (ori_yx + offset)[:, ::-1] * out_strid / np.array(
+                    [ratio_w, ratio_h]).reshape(-1, 2)
                point_pair_list.append(point_pair)

            # ndarry: (x, 2), expand poly along width
            detected_poly = self.point_pair2poly(point_pair_list)
-            detected_poly = self.expand_poly_along_width(detected_poly, shrink_ratio_of_width)
-            detected_poly[:, 0] = np.clip(detected_poly[:, 0], a_min=0, a_max=src_w)
-            detected_poly[:, 1] = np.clip(detected_poly[:, 1], a_min=0, a_max=src_h)
+            detected_poly = self.expand_poly_along_width(detected_poly,
+                                                         shrink_ratio_of_width)
+            detected_poly[:, 0] = np.clip(
+                detected_poly[:, 0], a_min=0, a_max=src_w)
+            detected_poly[:, 1] = np.clip(
+                detected_poly[:, 1], a_min=0, a_max=src_h)
            poly_list.append(detected_poly)

        return poly_list

-    def __call__(self, outs_dict, shape_list):                
+    def __call__(self, outs_dict, shape_list):
        score_list = outs_dict['f_score']
        border_list = outs_dict['f_border']
        tvo_list = outs_dict['f_tvo']
@@ -281,20 +328,28 @@ class SASTPostProcess(object):
            border_list = border_list.numpy()
            tvo_list = tvo_list.numpy()
            tco_list = tco_list.numpy()
-                    
+
        img_num = len(shape_list)
        poly_lists = []
        for ino in range(img_num):
-            p_score = score_list[ino].transpose((1,2,0))
-            p_border = border_list[ino].transpose((1,2,0))
-            p_tvo = tvo_list[ino].transpose((1,2,0))
-            p_tco = tco_list[ino].transpose((1,2,0))
+            p_score = score_list[ino].transpose((1, 2, 0))
+            p_border = border_list[ino].transpose((1, 2, 0))
+            p_tvo = tvo_list[ino].transpose((1, 2, 0))
+            p_tco = tco_list[ino].transpose((1, 2, 0))
            src_h, src_w, ratio_h, ratio_w = shape_list[ino]

-            poly_list = self.detect_sast(p_score, p_tvo, p_border, p_tco, ratio_w, ratio_h, src_w, src_h, 
-                                         shrink_ratio_of_width=self.shrink_ratio_of_width, 
-                                         tcl_map_thresh=self.tcl_map_thresh, offset_expand=self.expand_scale)
+            poly_list = self.detect_sast(
+                p_score,
+                p_tvo,
+                p_border,
+                p_tco,
+                ratio_w,
+                ratio_h,
+                src_w,
+                src_h,
+                shrink_ratio_of_width=self.shrink_ratio_of_width,
+                tcl_map_thresh=self.tcl_map_thresh,
+                offset_expand=self.expand_scale)
            poly_lists.append({'points': np.array(poly_list)})

        return poly_lists
-
--- a/ppocr/utils/dict/arabic_dict.txt
+++ b/ppocr/utils/dict/arabic_dict.txt
+ 
+!
+#
+$
+%
+&
+'
+(
+
+,
+-
+.
+/
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+:
+?
+@
+A
+B
+C
+D
+E
+F
+G
+H
+I
+J
+K
+L
+M
+N
+O
+P
+Q
+R
+S
+T
+U
+V
+W
+X
+Y
+Z
+_
+a
+b
+c
+d
+e
+f
+g
+h
+i
+j
+k
+l
+m
+n
+o
+p
+q
+r
+s
+t
+u
+v
+w
+x
+y
+z
+É
+é
+ء
+آ
+أ
+ؤ
+إ
+ئ
+ا
+ب
+ة
+ت
+ث
+ج
+ح
+خ
+د
+ذ
+ر
+ز
+س
+ش
+ص
+ض
+ط
+ظ
+ع
+غ
+ف
+ق
+ك
+ل
+م
+ن
+ه
+و
+ى
+ي
+ً
+ٌ
+ٍ
+َ
+ُ
+ِ
+ّ
+ْ
+ٓ
+ٔ
+ٰ
+ٱ
+ٹ
+پ
+چ
+ڈ
+ڑ
+ژ
+ک
+ڭ
+گ
+ں
+ھ
+ۀ
+ہ
+ۂ
+ۃ
+ۆ
+ۇ
+ۈ
+ۋ
+ی
+ې
+ے
+ۓ
+ە
+١
+٢
+٣
+٤
+٥
+٦
+٧
+٨
+٩
--- a/ppocr/utils/dict/cyrillic_dict.txt
+++ b/ppocr/utils/dict/cyrillic_dict.txt
+ 
+!
+#
+$
+%
+&
+'
+(
+
+,
+-
+.
+/
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+:
+?
+@
+A
+B
+C
+D
+E
+F
+G
+H
+I
+J
+K
+L
+M
+N
+O
+P
+Q
+R
+S
+T
+U
+V
+W
+X
+Y
+Z
+_
+a
+b
+c
+d
+e
+f
+g
+h
+i
+j
+k
+l
+m
+n
+o
+p
+q
+r
+s
+t
+u
+v
+w
+x
+y
+z
+É
+é
+Ё
+Є
+І
+Ј
+Љ
+Ў
+А
+Б
+В
+Г
+Д
+Е
+Ж
+З
+И
+Й
+К
+Л
+М
+Н
+О
+П
+Р
+С
+Т
+У
+Ф
+Х
+Ц
+Ч
+Ш
+Щ
+Ъ
+Ы
+Ь
+Э
+Ю
+Я
+а
+б
+в
+г
+д
+е
+ж
+з
+и
+й
+к
+л
+м
+н
+о
+п
+р
+с
+т
+у
+ф
+х
+ц
+ч
+ш
+щ
+ъ
+ы
+ь
+э
+ю
+я
+ё
+ђ
+є
+і
+ј
+љ
+њ
+ћ
+ў
+џ
+Ґ
+ґ
--- a/ppocr/utils/dict/devanagari_dict.txt
+++ b/ppocr/utils/dict/devanagari_dict.txt
+ 
+!
+#
+$
+%
+&
+'
+(
+
+,
+-
+.
+/
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+:
+?
+@
+A
+B
+C
+D
+E
+F
+G
+H
+I
+J
+K
+L
+M
+N
+O
+P
+Q
+R
+S
+T
+U
+V
+W
+X
+Y
+Z
+_
+a
+b
+c
+d
+e
+f
+g
+h
+i
+j
+k
+l
+m
+n
+o
+p
+q
+r
+s
+t
+u
+v
+w
+x
+y
+z
+É
+é
+ँ
+ं
+ः
+अ
+आ
+इ
+ई
+उ
+ऊ
+ऋ
+ए
+ऐ
+ऑ
+ओ
+औ
+क
+ख
+ग
+घ
+ङ
+च
+छ
+ज
+झ
+ञ
+ट
+ठ
+ड
+ढ
+ण
+त
+थ
+द
+ध
+न
+ऩ
+प
+फ
+ब
+भ
+म
+य
+र
+ऱ
+ल
+ळ
+व
+श
+ष
+स
+ह
+़
+ा
+ि
+ी
+ु
+ू
+ृ
+ॅ
+े
+ै
+ॉ
+ो
+ौ
+्
+॒
+क़
+ख़
+ग़
+ज़
+ड़
+ढ़
+फ़
+ॠ
+।
+०
+१
+२
+३
+४
+५
+६
+७
+८
+९
+॰