Merge remote-tracking branch 'origin/dygraph' into dy1

19eb7eb8 · Leif · 0afe6c32 · 03b7daa5 · 19eb7eb8 · 19eb7eb8
Commit 19eb7eb8 authored Sep 03, 2021 by Leif
20 changed files
--- a/ppocr/data/imaug/label_ops.py
+++ b/ppocr/data/imaug/label_ops.py
@@ -19,6 +19,7 @@ from __future__ import unicode_literals
 import numpy as np
 import string
+import json
 class ClsLabelEncode(object):
@@ -39,7 +40,6 @@ class DetLabelEncode(object):
        pass
    def __call__(self, data):
-        import json
        label = data['label']
        label = json.loads(label)
        nBox = len(label)
@@ -53,6 +53,8 @@ class DetLabelEncode(object):
                txt_tags.append(True)
            else:
                txt_tags.append(False)
+        if len(boxes) == 0:
+            return None
        boxes = self.expand_points_num(boxes)
        boxes = np.array(boxes, dtype=np.float32)
        txt_tags = np.array(txt_tags, dtype=np.bool)
@@ -96,7 +98,7 @@ class BaseRecLabelEncode(object):
            'ch', 'en', 'EN_symbol', 'french', 'german', 'japan', 'korean',
            'EN', 'it', 'xi', 'pu', 'ru', 'ar', 'ta', 'ug', 'fa', 'ur', 'rs',
            'oc', 'rsc', 'bg', 'uk', 'be', 'te', 'ka', 'chinese_cht', 'hi',
-            'mr', 'ne'
+            'mr', 'ne', 'latin', 'arabic', 'cyrillic', 'devanagari'
        ]
        assert character_type in support_character_type, "Only {} are supported now but get {}".format(
            support_character_type, character_type)
@@ -159,6 +161,34 @@ class BaseRecLabelEncode(object):
        return text_list
+class NRTRLabelEncode(BaseRecLabelEncode):
+    """ Convert between text-label and text-index """
+    def __init__(self,
+                 max_text_length,
+                 character_dict_path=None,
+                 character_type='EN_symbol',
+                 use_space_char=False,
+                 **kwargs):
+        super(NRTRLabelEncode,
+              self).__init__(max_text_length, character_dict_path,
+                             character_type, use_space_char)
+    def __call__(self, data):
+        text = data['label']
+        text = self.encode(text)
+        if text is None:
+            return None
+        data['length'] = np.array(len(text))
+        text.insert(0, 2)
+        text.append(3)
+        text = text + [0] * (self.max_text_len - len(text))
+        data['label'] = np.array(text)
+        return data
+    def add_special_char(self, dict_character):
+        dict_character = ['blank','<unk>','<s>','</s>'] + dict_character
+        return dict_character
 class CTCLabelEncode(BaseRecLabelEncode):
    """ Convert between text-label and text-index """
@@ -187,6 +217,78 @@ class CTCLabelEncode(BaseRecLabelEncode):
        return dict_character
+class E2ELabelEncodeTest(BaseRecLabelEncode):
+    def __init__(self,
+                 max_text_length,
+                 character_dict_path=None,
+                 character_type='EN',
+                 use_space_char=False,
+                 **kwargs):
+        super(E2ELabelEncodeTest,
+              self).__init__(max_text_length, character_dict_path,
+                             character_type, use_space_char)
+    def __call__(self, data):
+        import json
+        padnum = len(self.dict)
+        label = data['label']
+        label = json.loads(label)
+        nBox = len(label)
+        boxes, txts, txt_tags = [], [], []
+        for bno in range(0, nBox):
+            box = label[bno]['points']
+            txt = label[bno]['transcription']
+            boxes.append(box)
+            txts.append(txt)
+            if txt in ['*', '###']:
+                txt_tags.append(True)
+            else:
+                txt_tags.append(False)
+        boxes = np.array(boxes, dtype=np.float32)
+        txt_tags = np.array(txt_tags, dtype=np.bool)
+        data['polys'] = boxes
+        data['ignore_tags'] = txt_tags
+        temp_texts = []
+        for text in txts:
+            text = text.lower()
+            text = self.encode(text)
+            if text is None:
+                return None
+            text = text + [padnum] * (self.max_text_len - len(text)
+                                      )  # use 36 to pad
+            temp_texts.append(text)
+        data['texts'] = np.array(temp_texts)
+        return data
+class E2ELabelEncodeTrain(object):
+    def __init__(self, **kwargs):
+        pass
+    def __call__(self, data):
+        import json
+        label = data['label']
+        label = json.loads(label)
+        nBox = len(label)
+        boxes, txts, txt_tags = [], [], []
+        for bno in range(0, nBox):
+            box = label[bno]['points']
+            txt = label[bno]['transcription']
+            boxes.append(box)
+            txts.append(txt)
+            if txt in ['*', '###']:
+                txt_tags.append(True)
+            else:
+                txt_tags.append(False)
+        boxes = np.array(boxes, dtype=np.float32)
+        txt_tags = np.array(txt_tags, dtype=np.bool)
+        data['polys'] = boxes
+        data['texts'] = txts
+        data['ignore_tags'] = txt_tags
+        return data
 class AttnLabelEncode(BaseRecLabelEncode):
    """ Convert between text-label and text-index """
@@ -279,3 +381,171 @@ class SRNLabelEncode(BaseRecLabelEncode):
            assert False, "Unsupport type %s in get_beg_end_flag_idx" \
                          % beg_or_end
        return idx
+class TableLabelEncode(object):
+    """ Convert between text-label and text-index """
+    def __init__(self,
+                 max_text_length,
+                 max_elem_length,
+                 max_cell_num,
+                 character_dict_path,
+                 span_weight=1.0,
+                 **kwargs):
+        self.max_text_length = max_text_length
+        self.max_elem_length = max_elem_length
+        self.max_cell_num = max_cell_num
+        list_character, list_elem = self.load_char_elem_dict(
+            character_dict_path)
+        list_character = self.add_special_char(list_character)
+        list_elem = self.add_special_char(list_elem)
+        self.dict_character = {}
+        for i, char in enumerate(list_character):
+            self.dict_character[char] = i
+        self.dict_elem = {}
+        for i, elem in enumerate(list_elem):
+            self.dict_elem[elem] = i
+        self.span_weight = span_weight
+    def load_char_elem_dict(self, character_dict_path):
+        list_character = []
+        list_elem = []
+        with open(character_dict_path, "rb") as fin:
+            lines = fin.readlines()
+            substr = lines[0].decode('utf-8').strip("\r\n").split("\t")
+            character_num = int(substr[0])
+            elem_num = int(substr[1])
+            for cno in range(1, 1 + character_num):
+                character = lines[cno].decode('utf-8').strip("\r\n")
+                list_character.append(character)
+            for eno in range(1 + character_num, 1 + character_num + elem_num):
+                elem = lines[eno].decode('utf-8').strip("\r\n")
+                list_elem.append(elem)
+        return list_character, list_elem
+    def add_special_char(self, list_character):
+        self.beg_str = "sos"
+        self.end_str = "eos"
+        list_character = [self.beg_str] + list_character + [self.end_str]
+        return list_character
+    def get_span_idx_list(self):
+        span_idx_list = []
+        for elem in self.dict_elem:
+            if 'span' in elem:
+                span_idx_list.append(self.dict_elem[elem])
+        return span_idx_list
+    def __call__(self, data):
+        cells = data['cells']
+        structure = data['structure']['tokens']
+        structure = self.encode(structure, 'elem')
+        if structure is None:
+            return None
+        elem_num = len(structure)
+        structure = [0] + structure + [len(self.dict_elem) - 1]
+        structure = structure + [0] * (self.max_elem_length + 2 - len(structure)
+                                       )
+        structure = np.array(structure)
+        data['structure'] = structure
+        elem_char_idx1 = self.dict_elem['<td>']
+        elem_char_idx2 = self.dict_elem['<td']
+        span_idx_list = self.get_span_idx_list()
+        td_idx_list = np.logical_or(structure == elem_char_idx1,
+                                    structure == elem_char_idx2)
+        td_idx_list = np.where(td_idx_list)[0]
+        structure_mask = np.ones(
+            (self.max_elem_length + 2, 1), dtype=np.float32)
+        bbox_list = np.zeros((self.max_elem_length + 2, 4), dtype=np.float32)
+        bbox_list_mask = np.zeros(
+            (self.max_elem_length + 2, 1), dtype=np.float32)
+        img_height, img_width, img_ch = data['image'].shape
+        if len(span_idx_list) > 0:
+            span_weight = len(td_idx_list) * 1.0 / len(span_idx_list)
+            span_weight = min(max(span_weight, 1.0), self.span_weight)
+        for cno in range(len(cells)):
+            if 'bbox' in cells[cno]:
+                bbox = cells[cno]['bbox'].copy()
+                bbox[0] = bbox[0] * 1.0 / img_width
+                bbox[1] = bbox[1] * 1.0 / img_height
+                bbox[2] = bbox[2] * 1.0 / img_width
+                bbox[3] = bbox[3] * 1.0 / img_height
+                td_idx = td_idx_list[cno]
+                bbox_list[td_idx] = bbox
+                bbox_list_mask[td_idx] = 1.0
+                cand_span_idx = td_idx + 1
+                if cand_span_idx < (self.max_elem_length + 2):
+                    if structure[cand_span_idx] in span_idx_list:
+                        structure_mask[cand_span_idx] = span_weight
+        data['bbox_list'] = bbox_list
+        data['bbox_list_mask'] = bbox_list_mask
+        data['structure_mask'] = structure_mask
+        char_beg_idx = self.get_beg_end_flag_idx('beg', 'char')
+        char_end_idx = self.get_beg_end_flag_idx('end', 'char')
+        elem_beg_idx = self.get_beg_end_flag_idx('beg', 'elem')
+        elem_end_idx = self.get_beg_end_flag_idx('end', 'elem')
+        data['sp_tokens'] = np.array([
+            char_beg_idx, char_end_idx, elem_beg_idx, elem_end_idx,
+            elem_char_idx1, elem_char_idx2, self.max_text_length,
+            self.max_elem_length, self.max_cell_num, elem_num
+        ])
+        return data
+    def encode(self, text, char_or_elem):
+        """convert text-label into text-index.
+        """
+        if char_or_elem == "char":
+            max_len = self.max_text_length
+            current_dict = self.dict_character
+        else:
+            max_len = self.max_elem_length
+            current_dict = self.dict_elem
+        if len(text) > max_len:
+            return None
+        if len(text) == 0:
+            if char_or_elem == "char":
+                return [self.dict_character['space']]
+            else:
+                return None
+        text_list = []
+        for char in text:
+            if char not in current_dict:
+                return None
+            text_list.append(current_dict[char])
+        if len(text_list) == 0:
+            if char_or_elem == "char":
+                return [self.dict_character['space']]
+            else:
+                return None
+        return text_list
+    def get_ignored_tokens(self, char_or_elem):
+        beg_idx = self.get_beg_end_flag_idx("beg", char_or_elem)
+        end_idx = self.get_beg_end_flag_idx("end", char_or_elem)
+        return [beg_idx, end_idx]
+    def get_beg_end_flag_idx(self, beg_or_end, char_or_elem):
+        if char_or_elem == "char":
+            if beg_or_end == "beg":
+                idx = np.array(self.dict_character[self.beg_str])
+            elif beg_or_end == "end":
+                idx = np.array(self.dict_character[self.end_str])
+            else:
+                assert False, "Unsupport type %s in get_beg_end_flag_idx of char" \
+                              % beg_or_end
+        elif char_or_elem == "elem":
+            if beg_or_end == "beg":
+                idx = np.array(self.dict_elem[self.beg_str])
+            elif beg_or_end == "end":
+                idx = np.array(self.dict_elem[self.end_str])
+            else:
+                assert False, "Unsupport type %s in get_beg_end_flag_idx of elem" \
+                              % beg_or_end
+        else:
+            assert False, "Unsupport type %s in char_or_elem" \
+                              % char_or_elem
+        return idx
--- a/ppocr/data/imaug/operators.py
+++ b/ppocr/data/imaug/operators.py
@@ -57,6 +57,38 @@ class DecodeImage(object):
        return data
+class NRTRDecodeImage(object):
+    """ decode image """
+    def __init__(self, img_mode='RGB', channel_first=False, **kwargs):
+        self.img_mode = img_mode
+        self.channel_first = channel_first
+    def __call__(self, data):
+        img = data['image']
+        if six.PY2:
+            assert type(img) is str and len(
+                img) > 0, "invalid input 'img' in DecodeImage"
+        else:
+            assert type(img) is bytes and len(
+                img) > 0, "invalid input 'img' in DecodeImage"
+        img = np.frombuffer(img, dtype='uint8')
+        img = cv2.imdecode(img, 1)
+        if img is None:
+            return None
+        if self.img_mode == 'GRAY':
+            img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
+        elif self.img_mode == 'RGB':
+            assert img.shape[2] == 3, 'invalid shape of image[%s]' % (img.shape)
+            img = img[:, :, ::-1]
+        img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
+        if self.channel_first:
+            img = img.transpose((2, 0, 1))
+        data['image'] = img
+        return data
 class NormalizeImage(object):
    """ normalize image such as substract mean, divide std
    """
@@ -81,7 +113,7 @@ class NormalizeImage(object):
        assert isinstance(img,
                          np.ndarray), "invalid input 'img' in NormalizeImage"
        data['image'] = (
-            img.astype('float32') * self.scale - self.mean) / self.std
+                                img.astype('float32') * self.scale - self.mean) / self.std
        return data
@@ -163,7 +195,7 @@ class DetResizeForTest(object):
            img, (ratio_h, ratio_w)
        """
        limit_side_len = self.limit_side_len
-        h, w, _ = img.shape
+        h, w, c = img.shape
        # limit the max side
        if self.limit_type == 'max':
@@ -174,7 +206,7 @@ class DetResizeForTest(object):
                    ratio = float(limit_side_len) / w
            else:
                ratio = 1.
-        else:
+        elif self.limit_type == 'min':
            if min(h, w) < limit_side_len:
                if h < w:
                    ratio = float(limit_side_len) / h
@@ -182,6 +214,10 @@ class DetResizeForTest(object):
                    ratio = float(limit_side_len) / w
            else:
                ratio = 1.
+        elif self.limit_type == 'resize_long':
+            ratio = float(limit_side_len) / max(h,w)
+        else:
+            raise Exception('not support limit type, image ')
        resize_h = int(h * ratio)
        resize_w = int(w * ratio)
@@ -197,7 +233,6 @@ class DetResizeForTest(object):
            sys.exit(0)
        ratio_h = resize_h / float(h)
        ratio_w = resize_w / float(w)
-        # return img, np.array([h, w])
        return img, [ratio_h, ratio_w]
    def resize_image_type2(self, img):
@@ -206,7 +241,6 @@ class DetResizeForTest(object):
        resize_w = w
        resize_h = h
-        # Fix the longer side
        if resize_h > resize_w:
            ratio = float(self.resize_long) / resize_h
        else:
@@ -223,3 +257,72 @@ class DetResizeForTest(object):
        ratio_w = resize_w / float(w)
        return img, [ratio_h, ratio_w]
+class E2EResizeForTest(object):
+    def __init__(self, **kwargs):
+        super(E2EResizeForTest, self).__init__()
+        self.max_side_len = kwargs['max_side_len']
+        self.valid_set = kwargs['valid_set']
+    def __call__(self, data):
+        img = data['image']
+        src_h, src_w, _ = img.shape
+        if self.valid_set == 'totaltext':
+            im_resized, [ratio_h, ratio_w] = self.resize_image_for_totaltext(
+                img, max_side_len=self.max_side_len)
+        else:
+            im_resized, (ratio_h, ratio_w) = self.resize_image(
+                img, max_side_len=self.max_side_len)
+        data['image'] = im_resized
+        data['shape'] = np.array([src_h, src_w, ratio_h, ratio_w])
+        return data
+    def resize_image_for_totaltext(self, im, max_side_len=512):
+        h, w, _ = im.shape
+        resize_w = w
+        resize_h = h
+        ratio = 1.25
+        if h * ratio > max_side_len:
+            ratio = float(max_side_len) / resize_h
+        resize_h = int(resize_h * ratio)
+        resize_w = int(resize_w * ratio)
+        max_stride = 128
+        resize_h = (resize_h + max_stride - 1) // max_stride * max_stride
+        resize_w = (resize_w + max_stride - 1) // max_stride * max_stride
+        im = cv2.resize(im, (int(resize_w), int(resize_h)))
+        ratio_h = resize_h / float(h)
+        ratio_w = resize_w / float(w)
+        return im, (ratio_h, ratio_w)
+    def resize_image(self, im, max_side_len=512):
+        """
+        resize image to a size multiple of max_stride which is required by the network
+        :param im: the resized image
+        :param max_side_len: limit of max image size to avoid out of memory in gpu
+        :return: the resized image and the resize ratio
+        """
+        h, w, _ = im.shape
+        resize_w = w
+        resize_h = h
+        # Fix the longer side
+        if resize_h > resize_w:
+            ratio = float(max_side_len) / resize_h
+        else:
+            ratio = float(max_side_len) / resize_w
+        resize_h = int(resize_h * ratio)
+        resize_w = int(resize_w * ratio)
+        max_stride = 128
+        resize_h = (resize_h + max_stride - 1) // max_stride * max_stride
+        resize_w = (resize_w + max_stride - 1) // max_stride * max_stride
+        im = cv2.resize(im, (int(resize_w), int(resize_h)))
+        ratio_h = resize_h / float(h)
+        ratio_w = resize_w / float(w)
+        return im, (ratio_h, ratio_w)
--- a/ppocr/data/imaug/pg_process.py
+++ b/ppocr/data/imaug/pg_process.py
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import math
+import cv2
+import numpy as np
+__all__ = ['PGProcessTrain']
+class PGProcessTrain(object):
+    def __init__(self,
+                 character_dict_path,
+                 max_text_length,
+                 max_text_nums,
+                 tcl_len,
+                 batch_size=14,
+                 min_crop_size=24,
+                 min_text_size=4,
+                 max_text_size=512,
+                 **kwargs):
+        self.tcl_len = tcl_len
+        self.max_text_length = max_text_length
+        self.max_text_nums = max_text_nums
+        self.batch_size = batch_size
+        self.min_crop_size = min_crop_size
+        self.min_text_size = min_text_size
+        self.max_text_size = max_text_size
+        self.Lexicon_Table = self.get_dict(character_dict_path)
+        self.pad_num = len(self.Lexicon_Table)
+        self.img_id = 0
+    def get_dict(self, character_dict_path):
+        character_str = ""
+        with open(character_dict_path, "rb") as fin:
+            lines = fin.readlines()
+            for line in lines:
+                line = line.decode('utf-8').strip("\n").strip("\r\n")
+                character_str += line
+            dict_character = list(character_str)
+        return dict_character
+    def quad_area(self, poly):
+        """
+        compute area of a polygon
+        :param poly:
+        :return:
+        """
+        edge = [(poly[1][0] - poly[0][0]) * (poly[1][1] + poly[0][1]),
+                (poly[2][0] - poly[1][0]) * (poly[2][1] + poly[1][1]),
+                (poly[3][0] - poly[2][0]) * (poly[3][1] + poly[2][1]),
+                (poly[0][0] - poly[3][0]) * (poly[0][1] + poly[3][1])]
+        return np.sum(edge) / 2.
+    def gen_quad_from_poly(self, poly):
+        """
+        Generate min area quad from poly.
+        """
+        point_num = poly.shape[0]
+        min_area_quad = np.zeros((4, 2), dtype=np.float32)
+        rect = cv2.minAreaRect(poly.astype(
+            np.int32))  # (center (x,y), (width, height), angle of rotation)
+        box = np.array(cv2.boxPoints(rect))
+        first_point_idx = 0
+        min_dist = 1e4
+        for i in range(4):
+            dist = np.linalg.norm(box[(i + 0) % 4] - poly[0]) + \
+                   np.linalg.norm(box[(i + 1) % 4] - poly[point_num // 2 - 1]) + \
+                   np.linalg.norm(box[(i + 2) % 4] - poly[point_num // 2]) + \
+                   np.linalg.norm(box[(i + 3) % 4] - poly[-1])
+            if dist < min_dist:
+                min_dist = dist
+                first_point_idx = i
+        for i in range(4):
+            min_area_quad[i] = box[(first_point_idx + i) % 4]
+        return min_area_quad
+    def check_and_validate_polys(self, polys, tags, im_size):
+        """
+        check so that the text poly is in the same direction,
+        and also filter some invalid polygons
+        :param polys:
+        :param tags:
+        :return:
+        """
+        (h, w) = im_size
+        if polys.shape[0] == 0:
+            return polys, np.array([]), np.array([])
+        polys[:, :, 0] = np.clip(polys[:, :, 0], 0, w - 1)
+        polys[:, :, 1] = np.clip(polys[:, :, 1], 0, h - 1)
+        validated_polys = []
+        validated_tags = []
+        hv_tags = []
+        for poly, tag in zip(polys, tags):
+            quad = self.gen_quad_from_poly(poly)
+            p_area = self.quad_area(quad)
+            if abs(p_area) < 1:
+                print('invalid poly')
+                continue
+            if p_area > 0:
+                if tag == False:
+                    print('poly in wrong direction')
+                    tag = True  # reversed cases should be ignore
+                poly = poly[(0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2,
+                             1), :]
+                quad = quad[(0, 3, 2, 1), :]
+            len_w = np.linalg.norm(quad[0] - quad[1]) + np.linalg.norm(quad[3] -
+                                                                       quad[2])
+            len_h = np.linalg.norm(quad[0] - quad[3]) + np.linalg.norm(quad[1] -
+                                                                       quad[2])
+            hv_tag = 1
+            if len_w * 2.0 < len_h:
+                hv_tag = 0
+            validated_polys.append(poly)
+            validated_tags.append(tag)
+            hv_tags.append(hv_tag)
+        return np.array(validated_polys), np.array(validated_tags), np.array(
+            hv_tags)
+    def crop_area(self,
+                  im,
+                  polys,
+                  tags,
+                  hv_tags,
+                  txts,
+                  crop_background=False,
+                  max_tries=25):
+        """
+        make random crop from the input image
+        :param im:
+        :param polys:  [b,4,2]
+        :param tags:
+        :param crop_background:
+        :param max_tries: 50 -> 25
+        :return:
+        """
+        h, w, _ = im.shape
+        pad_h = h // 10
+        pad_w = w // 10
+        h_array = np.zeros((h + pad_h * 2), dtype=np.int32)
+        w_array = np.zeros((w + pad_w * 2), dtype=np.int32)
+        for poly in polys:
+            poly = np.round(poly, decimals=0).astype(np.int32)
+            minx = np.min(poly[:, 0])
+            maxx = np.max(poly[:, 0])
+            w_array[minx + pad_w:maxx + pad_w] = 1
+            miny = np.min(poly[:, 1])
+            maxy = np.max(poly[:, 1])
+            h_array[miny + pad_h:maxy + pad_h] = 1
+        # ensure the cropped area not across a text
+        h_axis = np.where(h_array == 0)[0]
+        w_axis = np.where(w_array == 0)[0]
+        if len(h_axis) == 0 or len(w_axis) == 0:
+            return im, polys, tags, hv_tags, txts
+        for i in range(max_tries):
+            xx = np.random.choice(w_axis, size=2)
+            xmin = np.min(xx) - pad_w
+            xmax = np.max(xx) - pad_w
+            xmin = np.clip(xmin, 0, w - 1)
+            xmax = np.clip(xmax, 0, w - 1)
+            yy = np.random.choice(h_axis, size=2)
+            ymin = np.min(yy) - pad_h
+            ymax = np.max(yy) - pad_h
+            ymin = np.clip(ymin, 0, h - 1)
+            ymax = np.clip(ymax, 0, h - 1)
+            if xmax - xmin < self.min_crop_size or \
+                    ymax - ymin < self.min_crop_size:
+                continue
+            if polys.shape[0] != 0:
+                poly_axis_in_area = (polys[:, :, 0] >= xmin) & (polys[:, :, 0] <= xmax) \
+                                    & (polys[:, :, 1] >= ymin) & (polys[:, :, 1] <= ymax)
+                selected_polys = np.where(
+                    np.sum(poly_axis_in_area, axis=1) == 4)[0]
+            else:
+                selected_polys = []
+            if len(selected_polys) == 0:
+                # no text in this area
+                if crop_background:
+                    txts_tmp = []
+                    for selected_poly in selected_polys:
+                        txts_tmp.append(txts[selected_poly])
+                    txts = txts_tmp
+                    return im[ymin: ymax + 1, xmin: xmax + 1, :], \
+                           polys[selected_polys], tags[selected_polys], hv_tags[selected_polys], txts
+                else:
+                    continue
+            im = im[ymin:ymax + 1, xmin:xmax + 1, :]
+            polys = polys[selected_polys]
+            tags = tags[selected_polys]
+            hv_tags = hv_tags[selected_polys]
+            txts_tmp = []
+            for selected_poly in selected_polys:
+                txts_tmp.append(txts[selected_poly])
+            txts = txts_tmp
+            polys[:, :, 0] -= xmin
+            polys[:, :, 1] -= ymin
+            return im, polys, tags, hv_tags, txts
+        return im, polys, tags, hv_tags, txts
+    def fit_and_gather_tcl_points_v2(self,
+                                     min_area_quad,
+                                     poly,
+                                     max_h,
+                                     max_w,
+                                     fixed_point_num=64,
+                                     img_id=0,
+                                     reference_height=3):
+        """
+        Find the center point of poly as key_points, then fit and gather.
+        """
+        key_point_xys = []
+        point_num = poly.shape[0]
+        for idx in range(point_num // 2):
+            center_point = (poly[idx] + poly[point_num - 1 - idx]) / 2.0
+            key_point_xys.append(center_point)
+        tmp_image = np.zeros(
+            shape=(
+                max_h,
+                max_w, ), dtype='float32')
+        cv2.polylines(tmp_image, [np.array(key_point_xys).astype('int32')],
+                      False, 1.0)
+        ys, xs = np.where(tmp_image > 0)
+        xy_text = np.array(list(zip(xs, ys)), dtype='float32')
+        left_center_pt = (
+            (min_area_quad[0] - min_area_quad[1]) / 2.0).reshape(1, 2)
+        right_center_pt = (
+            (min_area_quad[1] - min_area_quad[2]) / 2.0).reshape(1, 2)
+        proj_unit_vec = (right_center_pt - left_center_pt) / (
+            np.linalg.norm(right_center_pt - left_center_pt) + 1e-6)
+        proj_unit_vec_tile = np.tile(proj_unit_vec,
+                                     (xy_text.shape[0], 1))  # (n, 2)
+        left_center_pt_tile = np.tile(left_center_pt,
+                                      (xy_text.shape[0], 1))  # (n, 2)
+        xy_text_to_left_center = xy_text - left_center_pt_tile
+        proj_value = np.sum(xy_text_to_left_center * proj_unit_vec_tile, axis=1)
+        xy_text = xy_text[np.argsort(proj_value)]
+        # convert to np and keep the num of point not greater then fixed_point_num
+        pos_info = np.array(xy_text).reshape(-1, 2)[:, ::-1]  # xy-> yx
+        point_num = len(pos_info)
+        if point_num > fixed_point_num:
+            keep_ids = [
+                int((point_num * 1.0 / fixed_point_num) * x)
+                for x in range(fixed_point_num)
+            ]
+            pos_info = pos_info[keep_ids, :]
+        keep = int(min(len(pos_info), fixed_point_num))
+        if np.random.rand() < 0.2 and reference_height >= 3:
+            dl = (np.random.rand(keep) - 0.5) * reference_height * 0.3
+            random_float = np.array([1, 0]).reshape([1, 2]) * dl.reshape(
+                [keep, 1])
+            pos_info += random_float
+            pos_info[:, 0] = np.clip(pos_info[:, 0], 0, max_h - 1)
+            pos_info[:, 1] = np.clip(pos_info[:, 1], 0, max_w - 1)
+        # padding to fixed length
+        pos_l = np.zeros((self.tcl_len, 3), dtype=np.int32)
+        pos_l[:, 0] = np.ones((self.tcl_len, )) * img_id
+        pos_m = np.zeros((self.tcl_len, 1), dtype=np.float32)
+        pos_l[:keep, 1:] = np.round(pos_info).astype(np.int32)
+        pos_m[:keep] = 1.0
+        return pos_l, pos_m
+    def generate_direction_map(self, poly_quads, n_char, direction_map):
+        """
+        """
+        width_list = []
+        height_list = []
+        for quad in poly_quads:
+            quad_w = (np.linalg.norm(quad[0] - quad[1]) +
+                      np.linalg.norm(quad[2] - quad[3])) / 2.0
+            quad_h = (np.linalg.norm(quad[0] - quad[3]) +
+                      np.linalg.norm(quad[2] - quad[1])) / 2.0
+            width_list.append(quad_w)
+            height_list.append(quad_h)
+        norm_width = max(sum(width_list) / n_char, 1.0)
+        average_height = max(sum(height_list) / len(height_list), 1.0)
+        k = 1
+        for quad in poly_quads:
+            direct_vector_full = (
+                (quad[1] + quad[2]) - (quad[0] + quad[3])) / 2.0
+            direct_vector = direct_vector_full / (
+                np.linalg.norm(direct_vector_full) + 1e-6) * norm_width
+            direction_label = tuple(
+                map(float,
+                    [direct_vector[0], direct_vector[1], 1.0 / average_height]))
+            cv2.fillPoly(direction_map,
+                         quad.round().astype(np.int32)[np.newaxis, :, :],
+                         direction_label)
+            k += 1
+        return direction_map
+    def calculate_average_height(self, poly_quads):
+        """
+        """
+        height_list = []
+        for quad in poly_quads:
+            quad_h = (np.linalg.norm(quad[0] - quad[3]) +
+                      np.linalg.norm(quad[2] - quad[1])) / 2.0
+            height_list.append(quad_h)
+        average_height = max(sum(height_list) / len(height_list), 1.0)
+        return average_height
+    def generate_tcl_ctc_label(self,
+                               h,
+                               w,
+                               polys,
+                               tags,
+                               text_strs,
+                               ds_ratio,
+                               tcl_ratio=0.3,
+                               shrink_ratio_of_width=0.15):
+        """
+        Generate polygon.
+        """
+        score_map_big = np.zeros(
+            (
+                h,
+                w, ), dtype=np.float32)
+        h, w = int(h * ds_ratio), int(w * ds_ratio)
+        polys = polys * ds_ratio
+        score_map = np.zeros(
+            (
+                h,
+                w, ), dtype=np.float32)
+        score_label_map = np.zeros(
+            (
+                h,
+                w, ), dtype=np.float32)
+        tbo_map = np.zeros((h, w, 5), dtype=np.float32)
+        training_mask = np.ones(
+            (
+                h,
+                w, ), dtype=np.float32)
+        direction_map = np.ones((h, w, 3)) * np.array([0, 0, 1]).reshape(
+            [1, 1, 3]).astype(np.float32)
+        label_idx = 0
+        score_label_map_text_label_list = []
+        pos_list, pos_mask, label_list = [], [], []
+        for poly_idx, poly_tag in enumerate(zip(polys, tags)):
+            poly = poly_tag[0]
+            tag = poly_tag[1]
+            # generate min_area_quad
+            min_area_quad, center_point = self.gen_min_area_quad_from_poly(poly)
+            min_area_quad_h = 0.5 * (
+                np.linalg.norm(min_area_quad[0] - min_area_quad[3]) +
+                np.linalg.norm(min_area_quad[1] - min_area_quad[2]))
+            min_area_quad_w = 0.5 * (
+                np.linalg.norm(min_area_quad[0] - min_area_quad[1]) +
+                np.linalg.norm(min_area_quad[2] - min_area_quad[3]))
+            if min(min_area_quad_h, min_area_quad_w) < self.min_text_size * ds_ratio \
+                    or min(min_area_quad_h, min_area_quad_w) > self.max_text_size * ds_ratio:
+                continue
+            if tag:
+                cv2.fillPoly(training_mask,
+                             poly.astype(np.int32)[np.newaxis, :, :], 0.15)
+            else:
+                text_label = text_strs[poly_idx]
+                text_label = self.prepare_text_label(text_label,
+                                                     self.Lexicon_Table)
+                text_label_index_list = [[self.Lexicon_Table.index(c_)]
+                                         for c_ in text_label
+                                         if c_ in self.Lexicon_Table]
+                if len(text_label_index_list) < 1:
+                    continue
+                tcl_poly = self.poly2tcl(poly, tcl_ratio)
+                tcl_quads = self.poly2quads(tcl_poly)
+                poly_quads = self.poly2quads(poly)
+                stcl_quads, quad_index = self.shrink_poly_along_width(
+                    tcl_quads,
+                    shrink_ratio_of_width=shrink_ratio_of_width,
+                    expand_height_ratio=1.0 / tcl_ratio)
+                cv2.fillPoly(score_map,
+                             np.round(stcl_quads).astype(np.int32), 1.0)
+                cv2.fillPoly(score_map_big,
+                             np.round(stcl_quads / ds_ratio).astype(np.int32),
+                             1.0)
+                for idx, quad in enumerate(stcl_quads):
+                    quad_mask = np.zeros((h, w), dtype=np.float32)
+                    quad_mask = cv2.fillPoly(
+                        quad_mask,
+                        np.round(quad[np.newaxis, :, :]).astype(np.int32), 1.0)
+                    tbo_map = self.gen_quad_tbo(poly_quads[quad_index[idx]],
+                                                quad_mask, tbo_map)
+                # score label map and score_label_map_text_label_list for refine
+                if label_idx == 0:
+                    text_pos_list_ = [[len(self.Lexicon_Table)], ]
+                    score_label_map_text_label_list.append(text_pos_list_)
+                label_idx += 1
+                cv2.fillPoly(score_label_map,
+                             np.round(poly_quads).astype(np.int32), label_idx)
+                score_label_map_text_label_list.append(text_label_index_list)
+                # direction info, fix-me
+                n_char = len(text_label_index_list)
+                direction_map = self.generate_direction_map(poly_quads, n_char,
+                                                            direction_map)
+                # pos info
+                average_shrink_height = self.calculate_average_height(
+                    stcl_quads)
+                pos_l, pos_m = self.fit_and_gather_tcl_points_v2(
+                    min_area_quad,
+                    poly,
+                    max_h=h,
+                    max_w=w,
+                    fixed_point_num=64,
+                    img_id=self.img_id,
+                    reference_height=average_shrink_height)
+                label_l = text_label_index_list
+                if len(text_label_index_list) < 2:
+                    continue
+                pos_list.append(pos_l)
+                pos_mask.append(pos_m)
+                label_list.append(label_l)
+        # use big score_map for smooth tcl lines
+        score_map_big_resized = cv2.resize(
+            score_map_big, dsize=None, fx=ds_ratio, fy=ds_ratio)
+        score_map = np.array(score_map_big_resized > 1e-3, dtype='float32')
+        return score_map, score_label_map, tbo_map, direction_map, training_mask, \
+               pos_list, pos_mask, label_list, score_label_map_text_label_list
+    def adjust_point(self, poly):
+        """
+        adjust point order.
+        """
+        point_num = poly.shape[0]
+        if point_num == 4:
+            len_1 = np.linalg.norm(poly[0] - poly[1])
+            len_2 = np.linalg.norm(poly[1] - poly[2])
+            len_3 = np.linalg.norm(poly[2] - poly[3])
+            len_4 = np.linalg.norm(poly[3] - poly[0])
+            if (len_1 + len_3) * 1.5 < (len_2 + len_4):
+                poly = poly[[1, 2, 3, 0], :]
+        elif point_num > 4:
+            vector_1 = poly[0] - poly[1]
+            vector_2 = poly[1] - poly[2]
+            cos_theta = np.dot(vector_1, vector_2) / (
+                np.linalg.norm(vector_1) * np.linalg.norm(vector_2) + 1e-6)
+            theta = np.arccos(np.round(cos_theta, decimals=4))
+            if abs(theta) > (70 / 180 * math.pi):
+                index = list(range(1, point_num)) + [0]
+                poly = poly[np.array(index), :]
+        return poly
+    def gen_min_area_quad_from_poly(self, poly):
+        """
+        Generate min area quad from poly.
+        """
+        point_num = poly.shape[0]
+        min_area_quad = np.zeros((4, 2), dtype=np.float32)
+        if point_num == 4:
+            min_area_quad = poly
+            center_point = np.sum(poly, axis=0) / 4
+        else:
+            rect = cv2.minAreaRect(poly.astype(
+                np.int32))  # (center (x,y), (width, height), angle of rotation)
+            center_point = rect[0]
+            box = np.array(cv2.boxPoints(rect))
+            first_point_idx = 0
+            min_dist = 1e4
+            for i in range(4):
+                dist = np.linalg.norm(box[(i + 0) % 4] - poly[0]) + \
+                       np.linalg.norm(box[(i + 1) % 4] - poly[point_num // 2 - 1]) + \
+                       np.linalg.norm(box[(i + 2) % 4] - poly[point_num // 2]) + \
+                       np.linalg.norm(box[(i + 3) % 4] - poly[-1])
+                if dist < min_dist:
+                    min_dist = dist
+                    first_point_idx = i
+            for i in range(4):
+                min_area_quad[i] = box[(first_point_idx + i) % 4]
+        return min_area_quad, center_point
+    def shrink_quad_along_width(self,
+                                quad,
+                                begin_width_ratio=0.,
+                                end_width_ratio=1.):
+        """
+        Generate shrink_quad_along_width.
+        """
+        ratio_pair = np.array(
+            [[begin_width_ratio], [end_width_ratio]], dtype=np.float32)
+        p0_1 = quad[0] + (quad[1] - quad[0]) * ratio_pair
+        p3_2 = quad[3] + (quad[2] - quad[3]) * ratio_pair
+        return np.array([p0_1[0], p0_1[1], p3_2[1], p3_2[0]])
+    def shrink_poly_along_width(self,
+                                quads,
+                                shrink_ratio_of_width,
+                                expand_height_ratio=1.0):
+        """
+        shrink poly with given length.
+        """
+        upper_edge_list = []
+        def get_cut_info(edge_len_list, cut_len):
+            for idx, edge_len in enumerate(edge_len_list):
+                cut_len -= edge_len
+                if cut_len <= 0.000001:
+                    ratio = (cut_len + edge_len_list[idx]) / edge_len_list[idx]
+                    return idx, ratio
+        for quad in quads:
+            upper_edge_len = np.linalg.norm(quad[0] - quad[1])
+            upper_edge_list.append(upper_edge_len)
+        # length of left edge and right edge.
+        left_length = np.linalg.norm(quads[0][0] - quads[0][
+            3]) * expand_height_ratio
+        right_length = np.linalg.norm(quads[-1][1] - quads[-1][
+            2]) * expand_height_ratio
+        shrink_length = min(left_length, right_length,
+                            sum(upper_edge_list)) * shrink_ratio_of_width
+        # shrinking length
+        upper_len_left = shrink_length
+        upper_len_right = sum(upper_edge_list) - shrink_length
+        left_idx, left_ratio = get_cut_info(upper_edge_list, upper_len_left)
+        left_quad = self.shrink_quad_along_width(
+            quads[left_idx], begin_width_ratio=left_ratio, end_width_ratio=1)
+        right_idx, right_ratio = get_cut_info(upper_edge_list, upper_len_right)
+        right_quad = self.shrink_quad_along_width(
+            quads[right_idx], begin_width_ratio=0, end_width_ratio=right_ratio)
+        out_quad_list = []
+        if left_idx == right_idx:
+            out_quad_list.append(
+                [left_quad[0], right_quad[1], right_quad[2], left_quad[3]])
+        else:
+            out_quad_list.append(left_quad)
+            for idx in range(left_idx + 1, right_idx):
+                out_quad_list.append(quads[idx])
+            out_quad_list.append(right_quad)
+        return np.array(out_quad_list), list(range(left_idx, right_idx + 1))
+    def prepare_text_label(self, label_str, Lexicon_Table):
+        """
+        Prepare text lablel by given Lexicon_Table.
+        """
+        if len(Lexicon_Table) == 36:
+            return label_str.lower()
+        else:
+            return label_str
+    def vector_angle(self, A, B):
+        """
+        Calculate the angle between vector AB and x-axis positive direction.
+        """
+        AB = np.array([B[1] - A[1], B[0] - A[0]])
+        return np.arctan2(*AB)
+    def theta_line_cross_point(self, theta, point):
+        """
+        Calculate the line through given point and angle in ax + by + c =0 form.
+        """
+        x, y = point
+        cos = np.cos(theta)
+        sin = np.sin(theta)
+        return [sin, -cos, cos * y - sin * x]
+    def line_cross_two_point(self, A, B):
+        """
+        Calculate the line through given point A and B in ax + by + c =0 form.
+        """
+        angle = self.vector_angle(A, B)
+        return self.theta_line_cross_point(angle, A)
+    def average_angle(self, poly):
+        """
+        Calculate the average angle between left and right edge in given poly.
+        """
+        p0, p1, p2, p3 = poly
+        angle30 = self.vector_angle(p3, p0)
+        angle21 = self.vector_angle(p2, p1)
+        return (angle30 + angle21) / 2
+    def line_cross_point(self, line1, line2):
+        """
+        line1 and line2 in  0=ax+by+c form, compute the cross point of line1 and line2
+        """
+        a1, b1, c1 = line1
+        a2, b2, c2 = line2
+        d = a1 * b2 - a2 * b1
+        if d == 0:
+            print('Cross point does not exist')
+            return np.array([0, 0], dtype=np.float32)
+        else:
+            x = (b1 * c2 - b2 * c1) / d
+            y = (a2 * c1 - a1 * c2) / d
+        return np.array([x, y], dtype=np.float32)
+    def quad2tcl(self, poly, ratio):
+        """
+        Generate center line by poly clock-wise point. (4, 2)
+        """
+        ratio_pair = np.array(
+            [[0.5 - ratio / 2], [0.5 + ratio / 2]], dtype=np.float32)
+        p0_3 = poly[0] + (poly[3] - poly[0]) * ratio_pair
+        p1_2 = poly[1] + (poly[2] - poly[1]) * ratio_pair
+        return np.array([p0_3[0], p1_2[0], p1_2[1], p0_3[1]])
+    def poly2tcl(self, poly, ratio):
+        """
+        Generate center line by poly clock-wise point.
+        """
+        ratio_pair = np.array(
+            [[0.5 - ratio / 2], [0.5 + ratio / 2]], dtype=np.float32)
+        tcl_poly = np.zeros_like(poly)
+        point_num = poly.shape[0]
+        for idx in range(point_num // 2):
+            point_pair = poly[idx] + (poly[point_num - 1 - idx] - poly[idx]
+                                      ) * ratio_pair
+            tcl_poly[idx] = point_pair[0]
+            tcl_poly[point_num - 1 - idx] = point_pair[1]
+        return tcl_poly
+    def gen_quad_tbo(self, quad, tcl_mask, tbo_map):
+        """
+        Generate tbo_map for give quad.
+        """
+        # upper and lower line function: ax + by + c = 0;
+        up_line = self.line_cross_two_point(quad[0], quad[1])
+        lower_line = self.line_cross_two_point(quad[3], quad[2])
+        quad_h = 0.5 * (np.linalg.norm(quad[0] - quad[3]) +
+                        np.linalg.norm(quad[1] - quad[2]))
+        quad_w = 0.5 * (np.linalg.norm(quad[0] - quad[1]) +
+                        np.linalg.norm(quad[2] - quad[3]))
+        # average angle of left and right line.
+        angle = self.average_angle(quad)
+        xy_in_poly = np.argwhere(tcl_mask == 1)
+        for y, x in xy_in_poly:
+            point = (x, y)
+            line = self.theta_line_cross_point(angle, point)
+            cross_point_upper = self.line_cross_point(up_line, line)
+            cross_point_lower = self.line_cross_point(lower_line, line)
+            ##FIX, offset reverse
+            upper_offset_x, upper_offset_y = cross_point_upper - point
+            lower_offset_x, lower_offset_y = cross_point_lower - point
+            tbo_map[y, x, 0] = upper_offset_y
+            tbo_map[y, x, 1] = upper_offset_x
+            tbo_map[y, x, 2] = lower_offset_y
+            tbo_map[y, x, 3] = lower_offset_x
+            tbo_map[y, x, 4] = 1.0 / max(min(quad_h, quad_w), 1.0) * 2
+        return tbo_map
+    def poly2quads(self, poly):
+        """
+        Split poly into quads.
+        """
+        quad_list = []
+        point_num = poly.shape[0]
+        # point pair
+        point_pair_list = []
+        for idx in range(point_num // 2):
+            point_pair = [poly[idx], poly[point_num - 1 - idx]]
+            point_pair_list.append(point_pair)
+        quad_num = point_num // 2 - 1
+        for idx in range(quad_num):
+            # reshape and adjust to clock-wise
+            quad_list.append((np.array(point_pair_list)[[idx, idx + 1]]
+                              ).reshape(4, 2)[[0, 2, 3, 1]])
+        return np.array(quad_list)
+    def rotate_im_poly(self, im, text_polys):
+        """
+        rotate image with 90 / 180 / 270 degre
+        """
+        im_w, im_h = im.shape[1], im.shape[0]
+        dst_im = im.copy()
+        dst_polys = []
+        rand_degree_ratio = np.random.rand()
+        rand_degree_cnt = 1
+        if rand_degree_ratio > 0.5:
+            rand_degree_cnt = 3
+        for i in range(rand_degree_cnt):
+            dst_im = np.rot90(dst_im)
+        rot_degree = -90 * rand_degree_cnt
+        rot_angle = rot_degree * math.pi / 180.0
+        n_poly = text_polys.shape[0]
+        cx, cy = 0.5 * im_w, 0.5 * im_h
+        ncx, ncy = 0.5 * dst_im.shape[1], 0.5 * dst_im.shape[0]
+        for i in range(n_poly):
+            wordBB = text_polys[i]
+            poly = []
+            for j in range(4):  # 16->4
+                sx, sy = wordBB[j][0], wordBB[j][1]
+                dx = math.cos(rot_angle) * (sx - cx) - math.sin(rot_angle) * (
+                    sy - cy) + ncx
+                dy = math.sin(rot_angle) * (sx - cx) + math.cos(rot_angle) * (
+                    sy - cy) + ncy
+                poly.append([dx, dy])
+            dst_polys.append(poly)
+        return dst_im, np.array(dst_polys, dtype=np.float32)
+    def __call__(self, data):
+        input_size = 512
+        im = data['image']
+        text_polys = data['polys']
+        text_tags = data['ignore_tags']
+        text_strs = data['texts']
+        h, w, _ = im.shape
+        text_polys, text_tags, hv_tags = self.check_and_validate_polys(
+            text_polys, text_tags, (h, w))
+        if text_polys.shape[0] <= 0:
+            return None
+        # set aspect ratio and keep area fix
+        asp_scales = np.arange(1.0, 1.55, 0.1)
+        asp_scale = np.random.choice(asp_scales)
+        if np.random.rand() < 0.5:
+            asp_scale = 1.0 / asp_scale
+        asp_scale = math.sqrt(asp_scale)
+        asp_wx = asp_scale
+        asp_hy = 1.0 / asp_scale
+        im = cv2.resize(im, dsize=None, fx=asp_wx, fy=asp_hy)
+        text_polys[:, :, 0] *= asp_wx
+        text_polys[:, :, 1] *= asp_hy
+        h, w, _ = im.shape
+        if max(h, w) > 2048:
+            rd_scale = 2048.0 / max(h, w)
+            im = cv2.resize(im, dsize=None, fx=rd_scale, fy=rd_scale)
+            text_polys *= rd_scale
+        h, w, _ = im.shape
+        if min(h, w) < 16:
+            return None
+        # no background
+        im, text_polys, text_tags, hv_tags, text_strs = self.crop_area(
+            im,
+            text_polys,
+            text_tags,
+            hv_tags,
+            text_strs,
+            crop_background=False)
+        if text_polys.shape[0] == 0:
+            return None
+        # # continue for all ignore case
+        if np.sum((text_tags * 1.0)) >= text_tags.size:
+            return None
+        new_h, new_w, _ = im.shape
+        if (new_h is None) or (new_w is None):
+            return None
+        # resize image
+        std_ratio = float(input_size) / max(new_w, new_h)
+        rand_scales = np.array(
+            [0.25, 0.375, 0.5, 0.625, 0.75, 0.875, 1.0, 1.0, 1.0, 1.0, 1.0])
+        rz_scale = std_ratio * np.random.choice(rand_scales)
+        im = cv2.resize(im, dsize=None, fx=rz_scale, fy=rz_scale)
+        text_polys[:, :, 0] *= rz_scale
+        text_polys[:, :, 1] *= rz_scale
+        # add gaussian blur
+        if np.random.rand() < 0.1 * 0.5:
+            ks = np.random.permutation(5)[0] + 1
+            ks = int(ks / 2) * 2 + 1
+            im = cv2.GaussianBlur(im, ksize=(ks, ks), sigmaX=0, sigmaY=0)
+        # add brighter
+        if np.random.rand() < 0.1 * 0.5:
+            im = im * (1.0 + np.random.rand() * 0.5)
+            im = np.clip(im, 0.0, 255.0)
+        # add darker
+        if np.random.rand() < 0.1 * 0.5:
+            im = im * (1.0 - np.random.rand() * 0.5)
+            im = np.clip(im, 0.0, 255.0)
+        # Padding the im to [input_size, input_size]
+        new_h, new_w, _ = im.shape
+        if min(new_w, new_h) < input_size * 0.5:
+            return None
+        im_padded = np.ones((input_size, input_size, 3), dtype=np.float32)
+        im_padded[:, :, 2] = 0.485 * 255
+        im_padded[:, :, 1] = 0.456 * 255
+        im_padded[:, :, 0] = 0.406 * 255
+        # Random the start position
+        del_h = input_size - new_h
+        del_w = input_size - new_w
+        sh, sw = 0, 0
+        if del_h > 1:
+            sh = int(np.random.rand() * del_h)
+        if del_w > 1:
+            sw = int(np.random.rand() * del_w)
+        # Padding
+        im_padded[sh:sh + new_h, sw:sw + new_w, :] = im.copy()
+        text_polys[:, :, 0] += sw
+        text_polys[:, :, 1] += sh
+        score_map, score_label_map, border_map, direction_map, training_mask, \
+        pos_list, pos_mask, label_list, score_label_map_text_label = self.generate_tcl_ctc_label(input_size,
+                                                                                                 input_size,
+                                                                                                 text_polys,
+                                                                                                 text_tags,
+                                                                                                 text_strs, 0.25)
+        if len(label_list) <= 0:  # eliminate negative samples
+            return None
+        pos_list_temp = np.zeros([64, 3])
+        pos_mask_temp = np.zeros([64, 1])
+        label_list_temp = np.zeros([self.max_text_length, 1]) + self.pad_num
+        for i, label in enumerate(label_list):
+            n = len(label)
+            if n > self.max_text_length:
+                label_list[i] = label[:self.max_text_length]
+                continue
+            while n < self.max_text_length:
+                label.append([self.pad_num])
+                n += 1
+        for i in range(len(label_list)):
+            label_list[i] = np.array(label_list[i])
+        if len(pos_list) <= 0 or len(pos_list) > self.max_text_nums:
+            return None
+        for __ in range(self.max_text_nums - len(pos_list), 0, -1):
+            pos_list.append(pos_list_temp)
+            pos_mask.append(pos_mask_temp)
+            label_list.append(label_list_temp)
+        if self.img_id == self.batch_size - 1:
+            self.img_id = 0
+        else:
+            self.img_id += 1
+        im_padded[:, :, 2] -= 0.485 * 255
+        im_padded[:, :, 1] -= 0.456 * 255
+        im_padded[:, :, 0] -= 0.406 * 255
+        im_padded[:, :, 2] /= (255.0 * 0.229)
+        im_padded[:, :, 1] /= (255.0 * 0.224)
+        im_padded[:, :, 0] /= (255.0 * 0.225)
+        im_padded = im_padded.transpose((2, 0, 1))
+        images = im_padded[::-1, :, :]
+        tcl_maps = score_map[np.newaxis, :, :]
+        tcl_label_maps = score_label_map[np.newaxis, :, :]
+        border_maps = border_map.transpose((2, 0, 1))
+        direction_maps = direction_map.transpose((2, 0, 1))
+        training_masks = training_mask[np.newaxis, :, :]
+        pos_list = np.array(pos_list)
+        pos_mask = np.array(pos_mask)
+        label_list = np.array(label_list)
+        data['images'] = images
+        data['tcl_maps'] = tcl_maps
+        data['tcl_label_maps'] = tcl_label_maps
+        data['border_maps'] = border_maps
+        data['direction_maps'] = direction_maps
+        data['training_masks'] = training_masks
+        data['label_list'] = label_list
+        data['pos_list'] = pos_list
+        data['pos_mask'] = pos_mask
+        return data
--- a/ppocr/data/imaug/rec_img_aug.py
+++ b/ppocr/data/imaug/rec_img_aug.py
@@ -16,7 +16,7 @@ import math
 import cv2
 import numpy as np
 import random
+from PIL import Image
 from .text_image_aug import tia_perspective, tia_stretch, tia_distort
@@ -43,6 +43,25 @@ class ClsResizeImg(object):
        return data
+class NRTRRecResizeImg(object):
+    def __init__(self, image_shape, resize_type, **kwargs):
+        self.image_shape = image_shape
+        self.resize_type = resize_type
+    def __call__(self, data):
+        img = data['image']
+        if self.resize_type == 'PIL':
+            image_pil = Image.fromarray(np.uint8(img))
+            img = image_pil.resize(self.image_shape, Image.ANTIALIAS)
+            img = np.array(img)
+        if self.resize_type == 'OpenCV':
+            img = cv2.resize(img, self.image_shape)
+        norm_img = np.expand_dims(img, -1)
+        norm_img = norm_img.transpose((2, 0, 1))
+        data['image'] = norm_img.astype(np.float32) / 128. - 1.
+        return data
 class RecResizeImg(object):
    def __init__(self,
                 image_shape,

--- a/ppocr/data/pgnet_dataset.py
+++ b/ppocr/data/pgnet_dataset.py
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+import os
+from paddle.io import Dataset
+from .imaug import transform, create_operators
+import random
+class PGDataSet(Dataset):
+    def __init__(self, config, mode, logger, seed=None):
+        super(PGDataSet, self).__init__()
+        self.logger = logger
+        self.seed = seed
+        self.mode = mode
+        global_config = config['Global']
+        dataset_config = config[mode]['dataset']
+        loader_config = config[mode]['loader']
+        self.delimiter = dataset_config.get('delimiter', '\t')
+        label_file_list = dataset_config.pop('label_file_list')
+        data_source_num = len(label_file_list)
+        ratio_list = dataset_config.get("ratio_list", [1.0])
+        if isinstance(ratio_list, (float, int)):
+            ratio_list = [float(ratio_list)] * int(data_source_num)
+        assert len(
+            ratio_list
+        ) == data_source_num, "The length of ratio_list should be the same as the file_list."
+        self.data_dir = dataset_config['data_dir']
+        self.do_shuffle = loader_config['shuffle']
+        logger.info("Initialize indexs of datasets:%s" % label_file_list)
+        self.data_lines = self.get_image_info_list(label_file_list, ratio_list)
+        self.data_idx_order_list = list(range(len(self.data_lines)))
+        if mode.lower() == "train":
+            self.shuffle_data_random()
+        self.ops = create_operators(dataset_config['transforms'], global_config)
+    def shuffle_data_random(self):
+        if self.do_shuffle:
+            random.seed(self.seed)
+            random.shuffle(self.data_lines)
+        return
+    def get_image_info_list(self, file_list, ratio_list):
+        if isinstance(file_list, str):
+            file_list = [file_list]
+        data_lines = []
+        for idx, file in enumerate(file_list):
+            with open(file, "rb") as f:
+                lines = f.readlines()
+                if self.mode == "train" or ratio_list[idx] < 1.0:
+                    random.seed(self.seed)
+                    lines = random.sample(lines,
+                                          round(len(lines) * ratio_list[idx]))
+                data_lines.extend(lines)
+        return data_lines
+    def __getitem__(self, idx):
+        file_idx = self.data_idx_order_list[idx]
+        data_line = self.data_lines[file_idx]
+        img_id = 0
+        try:
+            data_line = data_line.decode('utf-8')
+            substr = data_line.strip("\n").split(self.delimiter)
+            file_name = substr[0]
+            label = substr[1]
+            img_path = os.path.join(self.data_dir, file_name)
+            if self.mode.lower() == 'eval':
+                try:
+                    img_id = int(data_line.split(".")[0][7:])
+                except:
+                    img_id = 0
+            data = {'img_path': img_path, 'label': label, 'img_id': img_id}
+            if not os.path.exists(img_path):
+                raise Exception("{} does not exist!".format(img_path))
+            with open(data['img_path'], 'rb') as f:
+                img = f.read()
+                data['image'] = img
+            outs = transform(data, self.ops)
+        except Exception as e:
+            self.logger.error(
+                "When parsing line {}, error happened with msg: {}".format(
+                    self.data_idx_order_list[idx], e))
+            outs = None
+        if outs is None:
+            return self.__getitem__(np.random.randint(self.__len__()))
+        return outs
+    def __len__(self):
+        return len(self.data_idx_order_list)
--- a/ppocr/data/pubtab_dataset.py
+++ b/ppocr/data/pubtab_dataset.py
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+import os
+import random
+from paddle.io import Dataset
+import json
+from .imaug import transform, create_operators
+class PubTabDataSet(Dataset):
+    def __init__(self, config, mode, logger, seed=None):
+        super(PubTabDataSet, self).__init__()
+        self.logger = logger
+        global_config = config['Global']
+        dataset_config = config[mode]['dataset']
+        loader_config = config[mode]['loader']
+        label_file_path = dataset_config.pop('label_file_path')
+        self.data_dir = dataset_config['data_dir']
+        self.do_shuffle = loader_config['shuffle']
+        self.do_hard_select = False
+        if 'hard_select' in loader_config:
+            self.do_hard_select = loader_config['hard_select']
+            self.hard_prob = loader_config['hard_prob']
+        if self.do_hard_select:
+            self.img_select_prob = self.load_hard_select_prob()
+        self.table_select_type = None
+        if 'table_select_type' in loader_config:
+            self.table_select_type = loader_config['table_select_type']
+            self.table_select_prob = loader_config['table_select_prob']
+        self.seed = seed
+        logger.info("Initialize indexs of datasets:%s" % label_file_path)
+        with open(label_file_path, "rb") as f:
+            self.data_lines = f.readlines()
+        self.data_idx_order_list = list(range(len(self.data_lines)))
+        if mode.lower() == "train":
+            self.shuffle_data_random()
+        self.ops = create_operators(dataset_config['transforms'], global_config)
+    def shuffle_data_random(self):
+        if self.do_shuffle:
+            random.seed(self.seed)
+            random.shuffle(self.data_lines)
+        return
+    def __getitem__(self, idx):
+        try:
+            data_line = self.data_lines[idx]
+            data_line = data_line.decode('utf-8').strip("\n")
+            info = json.loads(data_line)
+            file_name = info['filename']
+            select_flag = True
+            if self.do_hard_select:
+                prob = self.img_select_prob[file_name]
+                if prob < random.uniform(0, 1):
+                    select_flag = False
+            if self.table_select_type:
+                structure = info['html']['structure']['tokens'].copy()
+                structure_str = ''.join(structure)
+                table_type = "simple"
+                if 'colspan' in structure_str or 'rowspan' in structure_str:
+                    table_type = "complex"
+                if table_type == "complex":
+                    if self.table_select_prob < random.uniform(0, 1):
+                        select_flag = False                    
+            if select_flag:
+                cells = info['html']['cells'].copy()
+                structure = info['html']['structure'].copy()
+                img_path = os.path.join(self.data_dir, file_name)
+                data = {'img_path': img_path, 'cells': cells, 'structure':structure}
+                if not os.path.exists(img_path):
+                    raise Exception("{} does not exist!".format(img_path))
+                with open(data['img_path'], 'rb') as f:
+                    img = f.read()
+                    data['image'] = img
+                outs = transform(data, self.ops)
+            else:
+                outs = None
+        except Exception as e:
+            self.logger.error(
+                "When parsing line {}, error happened with msg: {}".format(
+                    data_line, e))
+            outs = None
+        if outs is None:
+            return self.__getitem__(np.random.randint(self.__len__()))
+        return outs
+    def __len__(self):
+        return len(self.data_idx_order_list)
--- a/ppocr/data/simple_dataset.py
+++ b/ppocr/data/simple_dataset.py
@@ -69,6 +69,36 @@ class SimpleDataSet(Dataset):
        random.shuffle(self.data_lines)
        return
+    def get_ext_data(self):
+        ext_data_num = 0
+        for op in self.ops:
+            if hasattr(op, 'ext_data_num'):
+                ext_data_num = getattr(op, 'ext_data_num')
+                break
+        load_data_ops = self.ops[:2]
+        ext_data = []
+        while len(ext_data) < ext_data_num:
+            file_idx = self.data_idx_order_list[np.random.randint(self.__len__(
+            ))]
+            data_line = self.data_lines[file_idx]
+            data_line = data_line.decode('utf-8')
+            substr = data_line.strip("\n").split(self.delimiter)
+            file_name = substr[0]
+            label = substr[1]
+            img_path = os.path.join(self.data_dir, file_name)
+            data = {'img_path': img_path, 'label': label}
+            if not os.path.exists(img_path):
+                continue
+            with open(data['img_path'], 'rb') as f:
+                img = f.read()
+                data['image'] = img
+            data = transform(data, load_data_ops)
+            if data is None:
+                continue
+            ext_data.append(data)
+        return ext_data
    def __getitem__(self, idx):
        file_idx = self.data_idx_order_list[idx]
        data_line = self.data_lines[file_idx]
@@ -84,6 +114,7 @@ class SimpleDataSet(Dataset):
            with open(data['img_path'], 'rb') as f:
                img = f.read()
                data['image'] = img
+            data['ext_data'] = self.get_ext_data()
            outs = transform(data, self.ops)
        except Exception as e:
            self.logger.error(

--- a/ppocr/losses/__init__.py
+++ b/ppocr/losses/__init__.py
@@ -13,25 +13,38 @@
 # limitations under the License.
 import copy
+import paddle
+import paddle.nn as nn
+# det loss
+from .det_db_loss import DBLoss
+from .det_east_loss import EASTLoss
+from .det_sast_loss import SASTLoss
-def build_loss(config):
+# rec loss
-    # det loss
+from .rec_ctc_loss import CTCLoss
-    from .det_db_loss import DBLoss
+from .rec_att_loss import AttentionLoss
-    from .det_east_loss import EASTLoss
+from .rec_srn_loss import SRNLoss
-    from .det_sast_loss import SASTLoss
+from .rec_nrtr_loss import NRTRLoss
+# cls loss
+from .cls_loss import ClsLoss
+# e2e loss
+from .e2e_pg_loss import PGLoss
-    # rec loss
+# basic loss function
-    from .rec_ctc_loss import CTCLoss
+from .basic_loss import DistanceLoss
-    from .rec_att_loss import AttentionLoss
-    from .rec_srn_loss import SRNLoss
-    # cls loss
+# combined loss function
-    from .cls_loss import ClsLoss
+from .combined_loss import CombinedLoss
+# table loss
+from .table_att_loss import TableAttentionLoss
+def build_loss(config):
    support_dict = [
        'DBLoss', 'EASTLoss', 'SASTLoss', 'CTCLoss', 'ClsLoss', 'AttentionLoss',
-        'SRNLoss'
+        'SRNLoss', 'PGLoss', 'CombinedLoss', 'NRTRLoss', 'TableAttentionLoss'
    ]
    config = copy.deepcopy(config)

--- a/ppocr/losses/basic_loss.py
+++ b/ppocr/losses/basic_loss.py
+#copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle.nn import L1Loss
+from paddle.nn import MSELoss as L2Loss
+from paddle.nn import SmoothL1Loss
+class CELoss(nn.Layer):
+    def __init__(self, epsilon=None):
+        super().__init__()
+        if epsilon is not None and (epsilon <= 0 or epsilon >= 1):
+            epsilon = None
+        self.epsilon = epsilon
+    def _labelsmoothing(self, target, class_num):
+        if target.shape[-1] != class_num:
+            one_hot_target = F.one_hot(target, class_num)
+        else:
+            one_hot_target = target
+        soft_target = F.label_smooth(one_hot_target, epsilon=self.epsilon)
+        soft_target = paddle.reshape(soft_target, shape=[-1, class_num])
+        return soft_target
+    def forward(self, x, label):
+        loss_dict = {}
+        if self.epsilon is not None:
+            class_num = x.shape[-1]
+            label = self._labelsmoothing(label, class_num)
+            x = -F.log_softmax(x, axis=-1)
+            loss = paddle.sum(x * label, axis=-1)
+        else:
+            if label.shape[-1] == x.shape[-1]:
+                label = F.softmax(label, axis=-1)
+                soft_label = True
+            else:
+                soft_label = False
+            loss = F.cross_entropy(x, label=label, soft_label=soft_label)
+        return loss
+class KLJSLoss(object):
+    def __init__(self, mode='kl'):
+        assert mode in ['kl', 'js', 'KL', 'JS'], "mode can only be one of ['kl', 'js', 'KL', 'JS']"
+        self.mode = mode
+    def __call__(self, p1, p2, reduction="mean"):
+        loss = paddle.multiply(p2, paddle.log( (p2+1e-5)/(p1+1e-5) + 1e-5))
+        if self.mode.lower() == "js":
+            loss += paddle.multiply(p1, paddle.log((p1+1e-5)/(p2+1e-5) + 1e-5))
+            loss *= 0.5
+        if reduction == "mean":
+            loss = paddle.mean(loss, axis=[1,2])
+        elif reduction=="none" or reduction is None:
+            return loss 
+        else:
+            loss = paddle.sum(loss, axis=[1,2])
+        return loss 
+class DMLLoss(nn.Layer):
+    """
+    DMLLoss
+    """
+    def __init__(self, act=None):
+        super().__init__()
+        if act is not None:
+            assert act in ["softmax", "sigmoid"]
+        if act == "softmax":
+            self.act = nn.Softmax(axis=-1)
+        elif act == "sigmoid":
+            self.act = nn.Sigmoid()
+        else:
+            self.act = None
+        self.jskl_loss = KLJSLoss(mode="js")
+    def forward(self, out1, out2):
+        if self.act is not None:
+            out1 = self.act(out1)
+            out2 = self.act(out2)
+        if len(out1.shape) < 2:
+            log_out1 = paddle.log(out1)
+            log_out2 = paddle.log(out2)
+            loss = (F.kl_div(
+                log_out1, out2, reduction='batchmean') + F.kl_div(
+                    log_out2, out1, reduction='batchmean')) / 2.0
+        else:
+            loss = self.jskl_loss(out1, out2)
+        return loss
+class DistanceLoss(nn.Layer):
+    """
+    DistanceLoss:
+        mode: loss mode
+    """
+    def __init__(self, mode="l2", **kargs):
+        super().__init__()
+        assert mode in ["l1", "l2", "smooth_l1"]
+        if mode == "l1":
+            self.loss_func = nn.L1Loss(**kargs)
+        elif mode == "l2":
+            self.loss_func = nn.MSELoss(**kargs)
+        elif mode == "smooth_l1":
+            self.loss_func = nn.SmoothL1Loss(**kargs)
+    def forward(self, x, y):
+        return self.loss_func(x, y)
--- a/ppocr/losses/cls_loss.py
+++ b/ppocr/losses/cls_loss.py
@@ -24,7 +24,7 @@ class ClsLoss(nn.Layer):
        super(ClsLoss, self).__init__()
        self.loss_func = nn.CrossEntropyLoss(reduction='mean')
-    def __call__(self, predicts, batch):
+    def forward(self, predicts, batch):
-        label = batch[1]
+        label = batch[1].astype("int64")
        loss = self.loss_func(input=predicts, label=label)
        return {'loss': loss}
--- a/ppocr/losses/combined_loss.py
+++ b/ppocr/losses/combined_loss.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import paddle
+import paddle.nn as nn
+from .distillation_loss import DistillationCTCLoss
+from .distillation_loss import DistillationDMLLoss
+from .distillation_loss import DistillationDistanceLoss, DistillationDBLoss, DistillationDilaDBLoss
+class CombinedLoss(nn.Layer):
+    """
+    CombinedLoss:
+        a combionation of loss function
+    """
+    def __init__(self, loss_config_list=None):
+        super().__init__()
+        self.loss_func = []
+        self.loss_weight = []
+        assert isinstance(loss_config_list, list), (
+            'operator config should be a list')
+        for config in loss_config_list:
+            assert isinstance(config,
+                              dict) and len(config) == 1, "yaml format error"
+            name = list(config)[0]
+            param = config[name]
+            assert "weight" in param, "weight must be in param, but param just contains {}".format(
+                param.keys())
+            self.loss_weight.append(param.pop("weight"))
+            self.loss_func.append(eval(name)(**param))
+    def forward(self, input, batch, **kargs):
+        loss_dict = {}
+        loss_all = 0.
+        for idx, loss_func in enumerate(self.loss_func):
+            loss = loss_func(input, batch, **kargs)
+            if isinstance(loss, paddle.Tensor):
+                loss = {"loss_{}_{}".format(str(loss), idx): loss}
+            weight = self.loss_weight[idx]
+            for key in loss.keys():
+                if key == "loss":
+                    loss_all += loss[key] * weight
+                else:
+                    loss_dict["{}_{}".format(key, idx)] = loss[key]
+        loss_dict["loss"] = loss_all
+        return loss_dict
--- a/ppocr/losses/distillation_loss.py
+++ b/ppocr/losses/distillation_loss.py
+#copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+import paddle
+import paddle.nn as nn
+import numpy as np
+import cv2
+from .rec_ctc_loss import CTCLoss
+from .basic_loss import DMLLoss
+from .basic_loss import DistanceLoss
+from .det_db_loss import DBLoss
+from .det_basic_loss import BalanceLoss, MaskL1Loss, DiceLoss
+def _sum_loss(loss_dict):
+    if "loss" in loss_dict.keys():
+        return loss_dict
+    else:
+        loss_dict["loss"] = 0.
+        for k, value in loss_dict.items():
+            if k == "loss":
+                continue
+            else:
+                loss_dict["loss"] += value
+        return loss_dict
+class DistillationDMLLoss(DMLLoss):
+    """
+    """
+    def __init__(self,
+                 model_name_pairs=[],
+                 act=None,
+                 key=None,
+                 maps_name=None,
+                 name="dml"):
+        super().__init__(act=act)
+        assert isinstance(model_name_pairs, list)
+        self.key = key
+        self.model_name_pairs = self._check_model_name_pairs(model_name_pairs)
+        self.name = name
+        self.maps_name = self._check_maps_name(maps_name)
+    def _check_model_name_pairs(self, model_name_pairs):
+        if not isinstance(model_name_pairs, list):
+            return []
+        elif isinstance(model_name_pairs[0], list) and isinstance(model_name_pairs[0][0], str):
+            return model_name_pairs
+        else:
+            return [model_name_pairs]
+    def _check_maps_name(self, maps_name):
+        if maps_name is None:
+            return None
+        elif type(maps_name) == str:
+            return [maps_name]
+        elif type(maps_name) == list:
+            return [maps_name]
+        else:
+            return None
+    def _slice_out(self, outs):
+        new_outs = {}
+        for k in self.maps_name:
+            if k == "thrink_maps":
+                new_outs[k] = outs[:, 0, :, :]
+            elif k == "threshold_maps":
+                new_outs[k] = outs[:, 1, :, :]
+            elif k == "binary_maps":
+                new_outs[k] = outs[:, 2, :, :]
+            else:
+                continue
+        return new_outs
+    def forward(self, predicts, batch):
+        loss_dict = dict()
+        for idx, pair in enumerate(self.model_name_pairs):
+            out1 = predicts[pair[0]]
+            out2 = predicts[pair[1]]
+            if self.key is not None:
+                out1 = out1[self.key]
+                out2 = out2[self.key]
+            if self.maps_name is None:
+                loss = super().forward(out1, out2)
+                if isinstance(loss, dict):
+                    for key in loss:
+                        loss_dict["{}_{}_{}_{}".format(key, pair[0], pair[1],
+                                                       idx)] = loss[key]
+                else:
+                    loss_dict["{}_{}".format(self.name, idx)] = loss
+            else:
+                outs1 = self._slice_out(out1)
+                outs2 = self._slice_out(out2)
+                for _c, k in enumerate(outs1.keys()):
+                    loss = super().forward(outs1[k], outs2[k])
+                    if isinstance(loss, dict):
+                        for key in loss:
+                            loss_dict["{}_{}_{}_{}_{}".format(key, pair[
+                                0], pair[1], map_name, idx)] = loss[key]
+                    else:
+                        loss_dict["{}_{}_{}".format(self.name, self.maps_name[_c],
+                                                    idx)] = loss
+        loss_dict = _sum_loss(loss_dict)
+        return loss_dict
+class DistillationCTCLoss(CTCLoss):
+    def __init__(self, model_name_list=[], key=None, name="loss_ctc"):
+        super().__init__()
+        self.model_name_list = model_name_list
+        self.key = key
+        self.name = name
+    def forward(self, predicts, batch):
+        loss_dict = dict()
+        for idx, model_name in enumerate(self.model_name_list):
+            out = predicts[model_name]
+            if self.key is not None:
+                out = out[self.key]
+            loss = super().forward(out, batch)
+            if isinstance(loss, dict):
+                for key in loss:
+                    loss_dict["{}_{}_{}".format(self.name, model_name,
+                                                idx)] = loss[key]
+            else:
+                loss_dict["{}_{}".format(self.name, model_name)] = loss
+        return loss_dict
+class DistillationDBLoss(DBLoss):
+    def __init__(self,
+                 model_name_list=[],
+                 balance_loss=True,
+                 main_loss_type='DiceLoss',
+                 alpha=5,
+                 beta=10,
+                 ohem_ratio=3,
+                 eps=1e-6,
+                 name="db",
+                 **kwargs):
+        super().__init__()
+        self.model_name_list = model_name_list
+        self.name = name
+        self.key = None
+    def forward(self, predicts, batch):
+        loss_dict = {}
+        for idx, model_name in enumerate(self.model_name_list):
+            out = predicts[model_name]
+            if self.key is not None:
+                out = out[self.key]
+            loss = super().forward(out, batch)
+            if isinstance(loss, dict):
+                for key in loss.keys():
+                    if key == "loss":
+                        continue
+                    name = "{}_{}_{}".format(self.name, model_name, key)
+                    loss_dict[name] = loss[key]
+            else:
+                loss_dict["{}_{}".format(self.name, model_name)] = loss
+        loss_dict = _sum_loss(loss_dict)
+        return loss_dict
+class DistillationDilaDBLoss(DBLoss):
+    def __init__(self,
+                 model_name_pairs=[],
+                 key=None,
+                 balance_loss=True,
+                 main_loss_type='DiceLoss',
+                 alpha=5,
+                 beta=10,
+                 ohem_ratio=3,
+                 eps=1e-6,
+                 name="dila_dbloss"):
+        super().__init__()
+        self.model_name_pairs = model_name_pairs
+        self.name = name
+        self.key = key
+    def forward(self, predicts, batch):
+        loss_dict = dict()
+        for idx, pair in enumerate(self.model_name_pairs):
+            stu_outs = predicts[pair[0]]
+            tch_outs = predicts[pair[1]]
+            if self.key is not None:
+                stu_preds = stu_outs[self.key]
+                tch_preds = tch_outs[self.key]
+            stu_shrink_maps = stu_preds[:, 0, :, :]
+            stu_binary_maps = stu_preds[:, 2, :, :]
+            # dilation to teacher prediction
+            dilation_w = np.array([[1, 1], [1, 1]])
+            th_shrink_maps = tch_preds[:, 0, :, :]
+            th_shrink_maps = th_shrink_maps.numpy() > 0.3  # thresh = 0.3 
+            dilate_maps = np.zeros_like(th_shrink_maps).astype(np.float32)
+            for i in range(th_shrink_maps.shape[0]):
+                dilate_maps[i] = cv2.dilate(
+                    th_shrink_maps[i, :, :].astype(np.uint8), dilation_w)
+            th_shrink_maps = paddle.to_tensor(dilate_maps)
+            label_threshold_map, label_threshold_mask, label_shrink_map, label_shrink_mask = batch[
+                1:]
+            # calculate the shrink map loss
+            bce_loss = self.alpha * self.bce_loss(
+                stu_shrink_maps, th_shrink_maps, label_shrink_mask)
+            loss_binary_maps = self.dice_loss(stu_binary_maps, th_shrink_maps,
+                                              label_shrink_mask)
+            # k = f"{self.name}_{pair[0]}_{pair[1]}"
+            k = "{}_{}_{}".format(self.name, pair[0], pair[1])
+            loss_dict[k] = bce_loss + loss_binary_maps
+        loss_dict = _sum_loss(loss_dict)
+        return loss_dict
+class DistillationDistanceLoss(DistanceLoss):
+    """
+    """
+    def __init__(self,
+                 mode="l2",
+                 model_name_pairs=[],
+                 key=None,
+                 name="loss_distance",
+                 **kargs):
+        super().__init__(mode=mode, **kargs)
+        assert isinstance(model_name_pairs, list)
+        self.key = key
+        self.model_name_pairs = model_name_pairs
+        self.name = name + "_l2"
+    def forward(self, predicts, batch):
+        loss_dict = dict()
+        for idx, pair in enumerate(self.model_name_pairs):
+            out1 = predicts[pair[0]]
+            out2 = predicts[pair[1]]
+            if self.key is not None:
+                out1 = out1[self.key]
+                out2 = out2[self.key]
+            loss = super().forward(out1, out2)
+            if isinstance(loss, dict):
+                for key in loss:
+                    loss_dict["{}_{}_{}".format(self.name, key, idx)] = loss[
+                        key]
+            else:
+                loss_dict["{}_{}_{}_{}".format(self.name, pair[0], pair[1],
+                                               idx)] = loss
+        return loss_dict
--- a/ppocr/losses/e2e_pg_loss.py
+++ b/ppocr/losses/e2e_pg_loss.py
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from paddle import nn
+import paddle
+from .det_basic_loss import DiceLoss
+from ppocr.utils.e2e_utils.extract_batchsize import pre_process
+class PGLoss(nn.Layer):
+    def __init__(self,
+                 tcl_bs,
+                 max_text_length,
+                 max_text_nums,
+                 pad_num,
+                 eps=1e-6,
+                 **kwargs):
+        super(PGLoss, self).__init__()
+        self.tcl_bs = tcl_bs
+        self.max_text_nums = max_text_nums
+        self.max_text_length = max_text_length
+        self.pad_num = pad_num
+        self.dice_loss = DiceLoss(eps=eps)
+    def border_loss(self, f_border, l_border, l_score, l_mask):
+        l_border_split, l_border_norm = paddle.tensor.split(
+            l_border, num_or_sections=[4, 1], axis=1)
+        f_border_split = f_border
+        b, c, h, w = l_border_norm.shape
+        l_border_norm_split = paddle.expand(
+            x=l_border_norm, shape=[b, 4 * c, h, w])
+        b, c, h, w = l_score.shape
+        l_border_score = paddle.expand(x=l_score, shape=[b, 4 * c, h, w])
+        b, c, h, w = l_mask.shape
+        l_border_mask = paddle.expand(x=l_mask, shape=[b, 4 * c, h, w])
+        border_diff = l_border_split - f_border_split
+        abs_border_diff = paddle.abs(border_diff)
+        border_sign = abs_border_diff < 1.0
+        border_sign = paddle.cast(border_sign, dtype='float32')
+        border_sign.stop_gradient = True
+        border_in_loss = 0.5 * abs_border_diff * abs_border_diff * border_sign + \
+                         (abs_border_diff - 0.5) * (1.0 - border_sign)
+        border_out_loss = l_border_norm_split * border_in_loss
+        border_loss = paddle.sum(border_out_loss * l_border_score * l_border_mask) / \
+                      (paddle.sum(l_border_score * l_border_mask) + 1e-5)
+        return border_loss
+    def direction_loss(self, f_direction, l_direction, l_score, l_mask):
+        l_direction_split, l_direction_norm = paddle.tensor.split(
+            l_direction, num_or_sections=[2, 1], axis=1)
+        f_direction_split = f_direction
+        b, c, h, w = l_direction_norm.shape
+        l_direction_norm_split = paddle.expand(
+            x=l_direction_norm, shape=[b, 2 * c, h, w])
+        b, c, h, w = l_score.shape
+        l_direction_score = paddle.expand(x=l_score, shape=[b, 2 * c, h, w])
+        b, c, h, w = l_mask.shape
+        l_direction_mask = paddle.expand(x=l_mask, shape=[b, 2 * c, h, w])
+        direction_diff = l_direction_split - f_direction_split
+        abs_direction_diff = paddle.abs(direction_diff)
+        direction_sign = abs_direction_diff < 1.0
+        direction_sign = paddle.cast(direction_sign, dtype='float32')
+        direction_sign.stop_gradient = True
+        direction_in_loss = 0.5 * abs_direction_diff * abs_direction_diff * direction_sign + \
+                            (abs_direction_diff - 0.5) * (1.0 - direction_sign)
+        direction_out_loss = l_direction_norm_split * direction_in_loss
+        direction_loss = paddle.sum(direction_out_loss * l_direction_score * l_direction_mask) / \
+                         (paddle.sum(l_direction_score * l_direction_mask) + 1e-5)
+        return direction_loss
+    def ctcloss(self, f_char, tcl_pos, tcl_mask, tcl_label, label_t):
+        f_char = paddle.transpose(f_char, [0, 2, 3, 1])
+        tcl_pos = paddle.reshape(tcl_pos, [-1, 3])
+        tcl_pos = paddle.cast(tcl_pos, dtype=int)
+        f_tcl_char = paddle.gather_nd(f_char, tcl_pos)
+        f_tcl_char = paddle.reshape(f_tcl_char,
+                                    [-1, 64, 37])  # len(Lexicon_Table)+1
+        f_tcl_char_fg, f_tcl_char_bg = paddle.split(f_tcl_char, [36, 1], axis=2)
+        f_tcl_char_bg = f_tcl_char_bg * tcl_mask + (1.0 - tcl_mask) * 20.0
+        b, c, l = tcl_mask.shape
+        tcl_mask_fg = paddle.expand(x=tcl_mask, shape=[b, c, 36 * l])
+        tcl_mask_fg.stop_gradient = True
+        f_tcl_char_fg = f_tcl_char_fg * tcl_mask_fg + (1.0 - tcl_mask_fg) * (
+            -20.0)
+        f_tcl_char_mask = paddle.concat([f_tcl_char_fg, f_tcl_char_bg], axis=2)
+        f_tcl_char_ld = paddle.transpose(f_tcl_char_mask, (1, 0, 2))
+        N, B, _ = f_tcl_char_ld.shape
+        input_lengths = paddle.to_tensor([N] * B, dtype='int64')
+        cost = paddle.nn.functional.ctc_loss(
+            log_probs=f_tcl_char_ld,
+            labels=tcl_label,
+            input_lengths=input_lengths,
+            label_lengths=label_t,
+            blank=self.pad_num,
+            reduction='none')
+        cost = cost.mean()
+        return cost
+    def forward(self, predicts, labels):
+        images, tcl_maps, tcl_label_maps, border_maps \
+            , direction_maps, training_masks, label_list, pos_list, pos_mask = labels
+        # for all the batch_size
+        pos_list, pos_mask, label_list, label_t = pre_process(
+            label_list, pos_list, pos_mask, self.max_text_length,
+            self.max_text_nums, self.pad_num, self.tcl_bs)
+        f_score, f_border, f_direction, f_char = predicts['f_score'], predicts['f_border'], predicts['f_direction'], \
+                                                 predicts['f_char']
+        score_loss = self.dice_loss(f_score, tcl_maps, training_masks)
+        border_loss = self.border_loss(f_border, border_maps, tcl_maps,
+                                       training_masks)
+        direction_loss = self.direction_loss(f_direction, direction_maps,
+                                             tcl_maps, training_masks)
+        ctc_loss = self.ctcloss(f_char, pos_list, pos_mask, label_list, label_t)
+        loss_all = score_loss + border_loss + direction_loss + 5 * ctc_loss
+        losses = {
+            'loss': loss_all,
+            "score_loss": score_loss,
+            "border_loss": border_loss,
+            "direction_loss": direction_loss,
+            "ctc_loss": ctc_loss
+        }
+        return losses
--- a/ppocr/losses/rec_ctc_loss.py
+++ b/ppocr/losses/rec_ctc_loss.py
@@ -25,7 +25,7 @@ class CTCLoss(nn.Layer):
        super(CTCLoss, self).__init__()
        self.loss_func = nn.CTCLoss(blank=0, reduction='none')
-    def __call__(self, predicts, batch):
+    def forward(self, predicts, batch):
        predicts = predicts.transpose((1, 0, 2))
        N, B, _ = predicts.shape
        preds_lengths = paddle.to_tensor([N] * B, dtype='int64')

--- a/ppocr/losses/rec_nrtr_loss.py
+++ b/ppocr/losses/rec_nrtr_loss.py
+import paddle
+from paddle import nn
+import paddle.nn.functional as F
+class NRTRLoss(nn.Layer):
+    def __init__(self, smoothing=True, **kwargs):
+        super(NRTRLoss, self).__init__()
+        self.loss_func = nn.CrossEntropyLoss(reduction='mean', ignore_index=0)
+        self.smoothing = smoothing
+    def forward(self, pred, batch):
+        pred = pred.reshape([-1, pred.shape[2]])
+        max_len = batch[2].max()
+        tgt = batch[1][:, 1:2 + max_len]
+        tgt = tgt.reshape([-1])
+        if self.smoothing:
+            eps = 0.1
+            n_class = pred.shape[1]
+            one_hot = F.one_hot(tgt, pred.shape[1])
+            one_hot = one_hot * (1 - eps) + (1 - one_hot) * eps / (n_class - 1)
+            log_prb = F.log_softmax(pred, axis=1)
+            non_pad_mask = paddle.not_equal(
+                tgt, paddle.zeros(
+                    tgt.shape, dtype='int64'))
+            loss = -(one_hot * log_prb).sum(axis=1)
+            loss = loss.masked_select(non_pad_mask).mean()
+        else:
+            loss = self.loss_func(pred, tgt)
+        return {'loss': loss}
--- a/ppocr/losses/table_att_loss.py
+++ b/ppocr/losses/table_att_loss.py
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import paddle
+from paddle import nn
+from paddle.nn import functional as F
+from paddle import fluid
+class TableAttentionLoss(nn.Layer):
+    def __init__(self, structure_weight, loc_weight, use_giou=False, giou_weight=1.0, **kwargs):
+        super(TableAttentionLoss, self).__init__()
+        self.loss_func = nn.CrossEntropyLoss(weight=None, reduction='none')
+        self.structure_weight = structure_weight
+        self.loc_weight = loc_weight
+        self.use_giou = use_giou
+        self.giou_weight = giou_weight
+    def giou_loss(self, preds, bbox, eps=1e-7, reduction='mean'):
+        '''
+        :param preds:[[x1,y1,x2,y2], [x1,y1,x2,y2],,,]
+        :param bbox:[[x1,y1,x2,y2], [x1,y1,x2,y2],,,]
+        :return: loss
+        '''
+        ix1 = fluid.layers.elementwise_max(preds[:, 0], bbox[:, 0])
+        iy1 = fluid.layers.elementwise_max(preds[:, 1], bbox[:, 1])
+        ix2 = fluid.layers.elementwise_min(preds[:, 2], bbox[:, 2])
+        iy2 = fluid.layers.elementwise_min(preds[:, 3], bbox[:, 3])
+        iw = fluid.layers.clip(ix2 - ix1 + 1e-3, 0., 1e10)
+        ih = fluid.layers.clip(iy2 - iy1 + 1e-3, 0., 1e10)
+        # overlap
+        inters = iw * ih
+        # union
+        uni = (preds[:, 2] - preds[:, 0] + 1e-3) * (preds[:, 3] - preds[:, 1] + 1e-3
+            ) + (bbox[:, 2] - bbox[:, 0] + 1e-3) * (
+            bbox[:, 3] - bbox[:, 1] + 1e-3) - inters + eps
+        # ious
+        ious = inters / uni
+        ex1 = fluid.layers.elementwise_min(preds[:, 0], bbox[:, 0])
+        ey1 = fluid.layers.elementwise_min(preds[:, 1], bbox[:, 1])
+        ex2 = fluid.layers.elementwise_max(preds[:, 2], bbox[:, 2])
+        ey2 = fluid.layers.elementwise_max(preds[:, 3], bbox[:, 3])
+        ew = fluid.layers.clip(ex2 - ex1 + 1e-3, 0., 1e10)
+        eh = fluid.layers.clip(ey2 - ey1 + 1e-3, 0., 1e10)
+        # enclose erea
+        enclose = ew * eh + eps
+        giou = ious - (enclose - uni) / enclose
+        loss = 1 - giou
+        if reduction == 'mean':
+            loss = paddle.mean(loss)
+        elif reduction == 'sum':
+            loss = paddle.sum(loss)
+        else:
+            raise NotImplementedError
+        return loss
+    def forward(self, predicts, batch):
+        structure_probs = predicts['structure_probs']
+        structure_targets = batch[1].astype("int64")
+        structure_targets = structure_targets[:, 1:]
+        if len(batch) == 6:
+            structure_mask = batch[5].astype("int64")
+            structure_mask = structure_mask[:, 1:]
+            structure_mask = paddle.reshape(structure_mask, [-1])
+        structure_probs = paddle.reshape(structure_probs, [-1, structure_probs.shape[-1]])
+        structure_targets = paddle.reshape(structure_targets, [-1])
+        structure_loss = self.loss_func(structure_probs, structure_targets)
+        if len(batch) == 6:
+             structure_loss = structure_loss * structure_mask
+#         structure_loss = paddle.sum(structure_loss) * self.structure_weight
+        structure_loss = paddle.mean(structure_loss) * self.structure_weight
+        loc_preds = predicts['loc_preds']
+        loc_targets = batch[2].astype("float32")
+        loc_targets_mask = batch[4].astype("float32")
+        loc_targets = loc_targets[:, 1:, :]
+        loc_targets_mask = loc_targets_mask[:, 1:, :]
+        loc_loss = F.mse_loss(loc_preds * loc_targets_mask, loc_targets) * self.loc_weight
+        if self.use_giou:
+            loc_loss_giou = self.giou_loss(loc_preds * loc_targets_mask, loc_targets) * self.giou_weight
+            total_loss = structure_loss + loc_loss + loc_loss_giou
+            return {'loss':total_loss, "structure_loss":structure_loss, "loc_loss":loc_loss, "loc_loss_giou":loc_loss_giou}
+        else:
+            total_loss = structure_loss + loc_loss            
+            return {'loss':total_loss, "structure_loss":structure_loss, "loc_loss":loc_loss}
\ No newline at end of file
--- a/ppocr/metrics/__init__.py
+++ b/ppocr/metrics/__init__.py
@@ -19,19 +19,23 @@ from __future__ import unicode_literals
 import copy
-__all__ = ['build_metric']
+__all__ = ["build_metric"]
+from .det_metric import DetMetric
+from .rec_metric import RecMetric
+from .cls_metric import ClsMetric
+from .e2e_metric import E2EMetric
+from .distillation_metric import DistillationMetric
+from .table_metric import TableMetric
 def build_metric(config):
-    from .det_metric import DetMetric
+    support_dict = [
-    from .rec_metric import RecMetric
+        "DetMetric", "RecMetric", "ClsMetric", "E2EMetric", "DistillationMetric", "TableMetric"
-    from .cls_metric import ClsMetric
+    ]
-    support_dict = ['DetMetric', 'RecMetric', 'ClsMetric']
    config = copy.deepcopy(config)
-    module_name = config.pop('name')
+    module_name = config.pop("name")
    assert module_name in support_dict, Exception(
-        'metric only support {}'.format(support_dict))
+        "metric only support {}".format(support_dict))
    module_class = eval(module_name)(**config)
    return module_class
--- a/ppocr/metrics/det_metric.py
+++ b/ppocr/metrics/det_metric.py
@@ -55,6 +55,7 @@ class DetMetric(object):
            result = self.evaluator.evaluate_image(gt_info_list, det_info_list)
            self.results.append(result)
    def get_metric(self):
        """
        return metrics {

--- a/ppocr/metrics/distillation_metric.py
+++ b/ppocr/metrics/distillation_metric.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import importlib
+import copy
+from .rec_metric import RecMetric
+from .det_metric import DetMetric
+from .e2e_metric import E2EMetric
+from .cls_metric import ClsMetric
+class DistillationMetric(object):
+    def __init__(self,
+                 key=None,
+                 base_metric_name=None,
+                 main_indicator=None,
+                 **kwargs):
+        self.main_indicator = main_indicator
+        self.key = key
+        self.main_indicator = main_indicator
+        self.base_metric_name = base_metric_name
+        self.kwargs = kwargs
+        self.metrics = None
+    def _init_metrcis(self, preds):
+        self.metrics = dict()
+        mod = importlib.import_module(__name__)
+        for key in preds:
+            self.metrics[key] = getattr(mod, self.base_metric_name)(
+                main_indicator=self.main_indicator, **self.kwargs)
+            self.metrics[key].reset()
+    def __call__(self, preds, batch, **kwargs):
+        assert isinstance(preds, dict)
+        if self.metrics is None:
+            self._init_metrcis(preds)
+        output = dict()
+        for key in preds:
+            self.metrics[key].__call__(preds[key], batch, **kwargs)
+    def get_metric(self):
+        """
+        return metrics {
+                 'acc': 0,
+                 'norm_edit_dis': 0,
+            }
+        """
+        output = dict()
+        for key in self.metrics:
+            metric = self.metrics[key].get_metric()
+            # main indicator
+            if key == self.key:
+                output.update(metric)
+            else:
+                for sub_key in metric:
+                    output["{}_{}".format(key, sub_key)] = metric[sub_key]
+        return output
+    def reset(self):
+        for key in self.metrics:
+            self.metrics[key].reset()
--- a/ppocr/metrics/e2e_metric.py
+++ b/ppocr/metrics/e2e_metric.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+__all__ = ['E2EMetric']
+from ppocr.utils.e2e_metric.Deteval import get_socre_A, get_socre_B, combine_results
+from ppocr.utils.e2e_utils.extract_textpoint_slow import get_dict
+class E2EMetric(object):
+    def __init__(self,
+                 mode,
+                 gt_mat_dir,
+                 character_dict_path,
+                 main_indicator='f_score_e2e',
+                 **kwargs):
+        self.mode = mode
+        self.gt_mat_dir = gt_mat_dir
+        self.label_list = get_dict(character_dict_path)
+        self.max_index = len(self.label_list)
+        self.main_indicator = main_indicator
+        self.reset()
+    def __call__(self, preds, batch, **kwargs):
+        if self.mode == 'A':
+            gt_polyons_batch = batch[2]
+            temp_gt_strs_batch = batch[3][0]
+            ignore_tags_batch = batch[4]
+            gt_strs_batch = []
+            for temp_list in temp_gt_strs_batch:
+                t = ""
+                for index in temp_list:
+                    if index < self.max_index:
+                        t += self.label_list[index]
+                gt_strs_batch.append(t)
+            for pred, gt_polyons, gt_strs, ignore_tags in zip(
+                [preds], gt_polyons_batch, [gt_strs_batch], ignore_tags_batch):
+                # prepare gt
+                gt_info_list = [{
+                    'points': gt_polyon,
+                    'text': gt_str,
+                    'ignore': ignore_tag
+                } for gt_polyon, gt_str, ignore_tag in
+                                zip(gt_polyons, gt_strs, ignore_tags)]
+                # prepare det
+                e2e_info_list = [{
+                    'points': det_polyon,
+                    'texts': pred_str
+                } for det_polyon, pred_str in
+                                 zip(pred['points'], pred['texts'])]
+                result = get_socre_A(gt_info_list, e2e_info_list)
+                self.results.append(result)
+        else:
+            img_id = batch[5][0]
+            e2e_info_list = [{
+                'points': det_polyon,
+                'texts': pred_str
+            } for det_polyon, pred_str in zip(preds['points'], preds['texts'])]
+            result = get_socre_B(self.gt_mat_dir, img_id, e2e_info_list)
+            self.results.append(result)
+    def get_metric(self):
+        metircs = combine_results(self.results)
+        self.reset()
+        return metircs
+    def reset(self):
+        self.results = []  # clear results