Merge branch 'dygraph' into dygraph

191c9dee · Evezerest · GitHub · 3c6d5512 · 8def6786 · 191c9dee
Unverified Commit 191c9dee authored Mar 18, 2022 by Evezerest Committed by GitHub Mar 18, 2022
20 changed files
--- a/doc/doc_ch/recognition.md
+++ b/doc/doc_ch/recognition.md
@@ -11,6 +11,7 @@
    - [2.1 数据增强](#数据增强)
    - [2.2 通用模型训练](#通用模型训练)
    - [2.3 多语言模型训练](#多语言模型训练)
+    - [2.4 知识蒸馏训练](#知识蒸馏训练)
 - [3 评估](#评估)
 - [4 预测](#预测)
 - [5 转Inference模型测试](#Inference)
@@ -368,6 +369,13 @@ Eval:
    label_file_list: ["./train_data/french_val.txt"]
    ...
 ```
+<a name="知识蒸馏训练"></a>
+### 2.4 知识蒸馏训练
+PaddleOCR支持了基于知识蒸馏的文本识别模型训练过程，更多内容可以参考[知识蒸馏说明文档](./knowledge_distillation.md)。
 <a name="评估"></a>
 ## 3 评估

--- a/doc/doc_en/detection_en.md
+++ b/doc/doc_en/detection_en.md
@@ -9,6 +9,7 @@ This section uses the icdar2015 dataset as an example to introduce the training,
  * [2.1 Start Training](#21-start-training)
  * [2.2 Load Trained Model and Continue Training](#22-load-trained-model-and-continue-training)
  * [2.3 Training with New Backbone](#23-training-with-new-backbone)
+  * [2.4 Training with knowledge distillation](#24)
 - [3. Evaluation and Test](#3-evaluation-and-test)
  * [3.1 Evaluation](#31-evaluation)
  * [3.2 Test](#32-test)
@@ -174,6 +175,11 @@ After adding the four-part modules of the network, you only need to configure th
 **NOTE**: More details about replace Backbone and other mudule can be found in [doc](add_new_algorithm_en.md).
+### 2.4 Training with knowledge distillation
+Knowledge distillation is supported in PaddleOCR for text detection training process. For more details, please refer to [doc](./knowledge_distillation_en.md).
 ## 3. Evaluation and Test
 ### 3.1 Evaluation

--- a/doc/doc_en/models_list_en.md
+++ b/doc/doc_en/models_list_en.md
@@ -94,6 +94,8 @@ For more supported languages, please refer to : [Multi-language model](./multi_l
 ## 4. Paddle-Lite Model
 |Version|Introduction|Model size|Detection model|Text Direction model|Recognition model|Paddle-Lite branch|
 |---|---|---|---|---|---|---|
+|PP-OCRv2|extra-lightweight chinese OCR optimized model|11M|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_det_infer_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_infer_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_rec_infer_opt.nb)|v2.10|
+|PP-OCRv2(slim)|extra-lightweight chinese OCR optimized model|4.6M|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_det_slim_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_slim_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_rec_slim_opt.nb)|v2.10|
 |PP-OCRv2|extra-lightweight chinese OCR optimized model|11M|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_cls_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer_opt.nb)|v2.9|
 |PP-OCRv2(slim)|extra-lightweight chinese OCR optimized model|4.9M|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_slim_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_cls_slim_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_slim_opt.nb)|v2.9|
 |V2.0|ppocr_v2.0 extra-lightweight chinese OCR optimized model|7.8M|[download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_det_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_cls_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_rec_opt.nb)|v2.9|

--- a/doc/doc_en/quickstart_en.md
+++ b/doc/doc_en/quickstart_en.md
 # PaddleOCR Quick Start
-[PaddleOCR Quick Start](#paddleocr-quick-start)
 + [1. Install PaddleOCR Whl Package](#1-install-paddleocr-whl-package)
 * [2. Easy-to-Use](#2-easy-to-use)
  + [2.1 Use by Command Line](#21-use-by-command-line)

--- a/doc/doc_en/recognition_en.md
+++ b/doc/doc_en/recognition_en.md
@@ -10,6 +10,7 @@
    - [2.1 Data Augmentation](#Data_Augmentation)
    - [2.2 General Training](#Training)
    - [2.3 Multi-language Training](#Multi_language)
+    - [2.4 Training with Knowledge Distillation](#kd)
 - [3. Evaluation](#EVALUATION)
@@ -361,6 +362,12 @@ Eval:
    ...
 ```
+<a name="kd"></a>
+### 2.4 Training with Knowledge Distillation
+Knowledge distillation is supported in PaddleOCR for text recognition training process. For more details, please refer to [doc](./knowledge_distillation_en.md).
 <a name="EVALUATION"></a>
 ## 3. Evalution

--- a/ppocr/data/imaug/__init__.py
+++ b/ppocr/data/imaug/__init__.py
@@ -22,7 +22,8 @@ from .make_shrink_map import MakeShrinkMap
 from .random_crop_data import EastRandomCropData, RandomCropImgMask
 from .make_pse_gt import MakePseGt
-from .rec_img_aug import RecAug, RecResizeImg, ClsResizeImg, SRNRecResizeImg, NRTRRecResizeImg, SARRecResizeImg
+from .rec_img_aug import RecAug, RecResizeImg, ClsResizeImg, \
+    SRNRecResizeImg, NRTRRecResizeImg, SARRecResizeImg, PRENResizeImg
 from .randaugment import RandAugment
 from .copy_paste import CopyPaste
 from .ColorJitter import ColorJitter
@@ -36,6 +37,9 @@ from .gen_table_mask import *
 from .vqa import *
+from .fce_aug import *
+from .fce_targets import FCENetTargets
 def transform(data, ops=None):
    """ transform """

--- a/ppocr/data/imaug/fce_aug.py
+++ b/ppocr/data/imaug/fce_aug.py
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is refer from:
+https://github.com/open-mmlab/mmocr/blob/main/mmocr/datasets/pipelines/transforms.py
+"""
+import numpy as np
+from PIL import Image, ImageDraw
+import cv2
+from shapely.geometry import Polygon
+import math
+from ppocr.utils.poly_nms import poly_intersection
+class RandomScaling:
+    def __init__(self, size=800, scale=(3. / 4, 5. / 2), **kwargs):
+        """Random scale the image while keeping aspect.
+        Args:
+            size (int) : Base size before scaling.
+            scale (tuple(float)) : The range of scaling.
+        """
+        assert isinstance(size, int)
+        assert isinstance(scale, float) or isinstance(scale, tuple)
+        self.size = size
+        self.scale = scale if isinstance(scale, tuple) \
+            else (1 - scale, 1 + scale)
+    def __call__(self, data):
+        image = data['image']
+        text_polys = data['polys']
+        h, w, _ = image.shape
+        aspect_ratio = np.random.uniform(min(self.scale), max(self.scale))
+        scales = self.size * 1.0 / max(h, w) * aspect_ratio
+        scales = np.array([scales, scales])
+        out_size = (int(h * scales[1]), int(w * scales[0]))
+        image = cv2.resize(image, out_size[::-1])
+        data['image'] = image
+        text_polys[:, :, 0::2] = text_polys[:, :, 0::2] * scales[1]
+        text_polys[:, :, 1::2] = text_polys[:, :, 1::2] * scales[0]
+        data['polys'] = text_polys
+        return data
+class RandomCropFlip:
+    def __init__(self,
+                 pad_ratio=0.1,
+                 crop_ratio=0.5,
+                 iter_num=1,
+                 min_area_ratio=0.2,
+                 **kwargs):
+        """Random crop and flip a patch of the image.
+        Args:
+            crop_ratio (float): The ratio of cropping.
+            iter_num (int): Number of operations.
+            min_area_ratio (float): Minimal area ratio between cropped patch
+                and original image.
+        """
+        assert isinstance(crop_ratio, float)
+        assert isinstance(iter_num, int)
+        assert isinstance(min_area_ratio, float)
+        self.pad_ratio = pad_ratio
+        self.epsilon = 1e-2
+        self.crop_ratio = crop_ratio
+        self.iter_num = iter_num
+        self.min_area_ratio = min_area_ratio
+    def __call__(self, results):
+        for i in range(self.iter_num):
+            results = self.random_crop_flip(results)
+        return results
+    def random_crop_flip(self, results):
+        image = results['image']
+        polygons = results['polys']
+        ignore_tags = results['ignore_tags']
+        if len(polygons) == 0:
+            return results
+        if np.random.random() >= self.crop_ratio:
+            return results
+        h, w, _ = image.shape
+        area = h * w
+        pad_h = int(h * self.pad_ratio)
+        pad_w = int(w * self.pad_ratio)
+        h_axis, w_axis = self.generate_crop_target(image, polygons, pad_h,
+                                                   pad_w)
+        if len(h_axis) == 0 or len(w_axis) == 0:
+            return results
+        attempt = 0
+        while attempt < 50:
+            attempt += 1
+            polys_keep = []
+            polys_new = []
+            ignore_tags_keep = []
+            ignore_tags_new = []
+            xx = np.random.choice(w_axis, size=2)
+            xmin = np.min(xx) - pad_w
+            xmax = np.max(xx) - pad_w
+            xmin = np.clip(xmin, 0, w - 1)
+            xmax = np.clip(xmax, 0, w - 1)
+            yy = np.random.choice(h_axis, size=2)
+            ymin = np.min(yy) - pad_h
+            ymax = np.max(yy) - pad_h
+            ymin = np.clip(ymin, 0, h - 1)
+            ymax = np.clip(ymax, 0, h - 1)
+            if (xmax - xmin) * (ymax - ymin) < area * self.min_area_ratio:
+                # area too small
+                continue
+            pts = np.stack([[xmin, xmax, xmax, xmin],
+                            [ymin, ymin, ymax, ymax]]).T.astype(np.int32)
+            pp = Polygon(pts)
+            fail_flag = False
+            for polygon, ignore_tag in zip(polygons, ignore_tags):
+                ppi = Polygon(polygon.reshape(-1, 2))
+                ppiou, _ = poly_intersection(ppi, pp, buffer=0)
+                if np.abs(ppiou - float(ppi.area)) > self.epsilon and \
+                        np.abs(ppiou) > self.epsilon:
+                    fail_flag = True
+                    break
+                elif np.abs(ppiou - float(ppi.area)) < self.epsilon:
+                    polys_new.append(polygon)
+                    ignore_tags_new.append(ignore_tag)
+                else:
+                    polys_keep.append(polygon)
+                    ignore_tags_keep.append(ignore_tag)
+            if fail_flag:
+                continue
+            else:
+                break
+        cropped = image[ymin:ymax, xmin:xmax, :]
+        select_type = np.random.randint(3)
+        if select_type == 0:
+            img = np.ascontiguousarray(cropped[:, ::-1])
+        elif select_type == 1:
+            img = np.ascontiguousarray(cropped[::-1, :])
+        else:
+            img = np.ascontiguousarray(cropped[::-1, ::-1])
+        image[ymin:ymax, xmin:xmax, :] = img
+        results['img'] = image
+        if len(polys_new) != 0:
+            height, width, _ = cropped.shape
+            if select_type == 0:
+                for idx, polygon in enumerate(polys_new):
+                    poly = polygon.reshape(-1, 2)
+                    poly[:, 0] = width - poly[:, 0] + 2 * xmin
+                    polys_new[idx] = poly
+            elif select_type == 1:
+                for idx, polygon in enumerate(polys_new):
+                    poly = polygon.reshape(-1, 2)
+                    poly[:, 1] = height - poly[:, 1] + 2 * ymin
+                    polys_new[idx] = poly
+            else:
+                for idx, polygon in enumerate(polys_new):
+                    poly = polygon.reshape(-1, 2)
+                    poly[:, 0] = width - poly[:, 0] + 2 * xmin
+                    poly[:, 1] = height - poly[:, 1] + 2 * ymin
+                    polys_new[idx] = poly
+            polygons = polys_keep + polys_new
+            ignore_tags = ignore_tags_keep + ignore_tags_new
+            results['polys'] = np.array(polygons)
+            results['ignore_tags'] = ignore_tags
+        return results
+    def generate_crop_target(self, image, all_polys, pad_h, pad_w):
+        """Generate crop target and make sure not to crop the polygon
+        instances.
+        Args:
+            image (ndarray): The image waited to be crop.
+            all_polys (list[list[ndarray]]): All polygons including ground
+                truth polygons and ground truth ignored polygons.
+            pad_h (int): Padding length of height.
+            pad_w (int): Padding length of width.
+        Returns:
+            h_axis (ndarray): Vertical cropping range.
+            w_axis (ndarray): Horizontal cropping range.
+        """
+        h, w, _ = image.shape
+        h_array = np.zeros((h + pad_h * 2), dtype=np.int32)
+        w_array = np.zeros((w + pad_w * 2), dtype=np.int32)
+        text_polys = []
+        for polygon in all_polys:
+            rect = cv2.minAreaRect(polygon.astype(np.int32).reshape(-1, 2))
+            box = cv2.boxPoints(rect)
+            box = np.int0(box)
+            text_polys.append([box[0], box[1], box[2], box[3]])
+        polys = np.array(text_polys, dtype=np.int32)
+        for poly in polys:
+            poly = np.round(poly, decimals=0).astype(np.int32)
+            minx = np.min(poly[:, 0])
+            maxx = np.max(poly[:, 0])
+            w_array[minx + pad_w:maxx + pad_w] = 1
+            miny = np.min(poly[:, 1])
+            maxy = np.max(poly[:, 1])
+            h_array[miny + pad_h:maxy + pad_h] = 1
+        h_axis = np.where(h_array == 0)[0]
+        w_axis = np.where(w_array == 0)[0]
+        return h_axis, w_axis
+class RandomCropPolyInstances:
+    """Randomly crop images and make sure to contain at least one intact
+    instance."""
+    def __init__(self, crop_ratio=5.0 / 8.0, min_side_ratio=0.4, **kwargs):
+        super().__init__()
+        self.crop_ratio = crop_ratio
+        self.min_side_ratio = min_side_ratio
+    def sample_valid_start_end(self, valid_array, min_len, max_start, min_end):
+        assert isinstance(min_len, int)
+        assert len(valid_array) > min_len
+        start_array = valid_array.copy()
+        max_start = min(len(start_array) - min_len, max_start)
+        start_array[max_start:] = 0
+        start_array[0] = 1
+        diff_array = np.hstack([0, start_array]) - np.hstack([start_array, 0])
+        region_starts = np.where(diff_array < 0)[0]
+        region_ends = np.where(diff_array > 0)[0]
+        region_ind = np.random.randint(0, len(region_starts))
+        start = np.random.randint(region_starts[region_ind],
+                                  region_ends[region_ind])
+        end_array = valid_array.copy()
+        min_end = max(start + min_len, min_end)
+        end_array[:min_end] = 0
+        end_array[-1] = 1
+        diff_array = np.hstack([0, end_array]) - np.hstack([end_array, 0])
+        region_starts = np.where(diff_array < 0)[0]
+        region_ends = np.where(diff_array > 0)[0]
+        region_ind = np.random.randint(0, len(region_starts))
+        end = np.random.randint(region_starts[region_ind],
+                                region_ends[region_ind])
+        return start, end
+    def sample_crop_box(self, img_size, results):
+        """Generate crop box and make sure not to crop the polygon instances.
+        Args:
+            img_size (tuple(int)): The image size (h, w).
+            results (dict): The results dict.
+        """
+        assert isinstance(img_size, tuple)
+        h, w = img_size[:2]
+        key_masks = results['polys']
+        x_valid_array = np.ones(w, dtype=np.int32)
+        y_valid_array = np.ones(h, dtype=np.int32)
+        selected_mask = key_masks[np.random.randint(0, len(key_masks))]
+        selected_mask = selected_mask.reshape((-1, 2)).astype(np.int32)
+        max_x_start = max(np.min(selected_mask[:, 0]) - 2, 0)
+        min_x_end = min(np.max(selected_mask[:, 0]) + 3, w - 1)
+        max_y_start = max(np.min(selected_mask[:, 1]) - 2, 0)
+        min_y_end = min(np.max(selected_mask[:, 1]) + 3, h - 1)
+        for mask in key_masks:
+            mask = mask.reshape((-1, 2)).astype(np.int32)
+            clip_x = np.clip(mask[:, 0], 0, w - 1)
+            clip_y = np.clip(mask[:, 1], 0, h - 1)
+            min_x, max_x = np.min(clip_x), np.max(clip_x)
+            min_y, max_y = np.min(clip_y), np.max(clip_y)
+            x_valid_array[min_x - 2:max_x + 3] = 0
+            y_valid_array[min_y - 2:max_y + 3] = 0
+        min_w = int(w * self.min_side_ratio)
+        min_h = int(h * self.min_side_ratio)
+        x1, x2 = self.sample_valid_start_end(x_valid_array, min_w, max_x_start,
+                                             min_x_end)
+        y1, y2 = self.sample_valid_start_end(y_valid_array, min_h, max_y_start,
+                                             min_y_end)
+        return np.array([x1, y1, x2, y2])
+    def crop_img(self, img, bbox):
+        assert img.ndim == 3
+        h, w, _ = img.shape
+        assert 0 <= bbox[1] < bbox[3] <= h
+        assert 0 <= bbox[0] < bbox[2] <= w
+        return img[bbox[1]:bbox[3], bbox[0]:bbox[2]]
+    def __call__(self, results):
+        image = results['image']
+        polygons = results['polys']
+        ignore_tags = results['ignore_tags']
+        if len(polygons) < 1:
+            return results
+        if np.random.random_sample() < self.crop_ratio:
+            crop_box = self.sample_crop_box(image.shape, results)
+            img = self.crop_img(image, crop_box)
+            results['image'] = img
+            # crop and filter masks
+            x1, y1, x2, y2 = crop_box
+            w = max(x2 - x1, 1)
+            h = max(y2 - y1, 1)
+            polygons[:, :, 0::2] = polygons[:, :, 0::2] - x1
+            polygons[:, :, 1::2] = polygons[:, :, 1::2] - y1
+            valid_masks_list = []
+            valid_tags_list = []
+            for ind, polygon in enumerate(polygons):
+                if (polygon[:, ::2] > -4).all() and (
+                        polygon[:, ::2] < w + 4).all() and (
+                            polygon[:, 1::2] > -4).all() and (
+                                polygon[:, 1::2] < h + 4).all():
+                    polygon[:, ::2] = np.clip(polygon[:, ::2], 0, w)
+                    polygon[:, 1::2] = np.clip(polygon[:, 1::2], 0, h)
+                    valid_masks_list.append(polygon)
+                    valid_tags_list.append(ignore_tags[ind])
+            results['polys'] = np.array(valid_masks_list)
+            results['ignore_tags'] = valid_tags_list
+        return results
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        return repr_str
+class RandomRotatePolyInstances:
+    def __init__(self,
+                 rotate_ratio=0.5,
+                 max_angle=10,
+                 pad_with_fixed_color=False,
+                 pad_value=(0, 0, 0),
+                 **kwargs):
+        """Randomly rotate images and polygon masks.
+        Args:
+            rotate_ratio (float): The ratio of samples to operate rotation.
+            max_angle (int): The maximum rotation angle.
+            pad_with_fixed_color (bool): The flag for whether to pad rotated
+               image with fixed value. If set to False, the rotated image will
+               be padded onto cropped image.
+            pad_value (tuple(int)): The color value for padding rotated image.
+        """
+        self.rotate_ratio = rotate_ratio
+        self.max_angle = max_angle
+        self.pad_with_fixed_color = pad_with_fixed_color
+        self.pad_value = pad_value
+    def rotate(self, center, points, theta, center_shift=(0, 0)):
+        # rotate points.
+        (center_x, center_y) = center
+        center_y = -center_y
+        x, y = points[:, ::2], points[:, 1::2]
+        y = -y
+        theta = theta / 180 * math.pi
+        cos = math.cos(theta)
+        sin = math.sin(theta)
+        x = (x - center_x)
+        y = (y - center_y)
+        _x = center_x + x * cos - y * sin + center_shift[0]
+        _y = -(center_y + x * sin + y * cos) + center_shift[1]
+        points[:, ::2], points[:, 1::2] = _x, _y
+        return points
+    def cal_canvas_size(self, ori_size, degree):
+        assert isinstance(ori_size, tuple)
+        angle = degree * math.pi / 180.0
+        h, w = ori_size[:2]
+        cos = math.cos(angle)
+        sin = math.sin(angle)
+        canvas_h = int(w * math.fabs(sin) + h * math.fabs(cos))
+        canvas_w = int(w * math.fabs(cos) + h * math.fabs(sin))
+        canvas_size = (canvas_h, canvas_w)
+        return canvas_size
+    def sample_angle(self, max_angle):
+        angle = np.random.random_sample() * 2 * max_angle - max_angle
+        return angle
+    def rotate_img(self, img, angle, canvas_size):
+        h, w = img.shape[:2]
+        rotation_matrix = cv2.getRotationMatrix2D((w / 2, h / 2), angle, 1)
+        rotation_matrix[0, 2] += int((canvas_size[1] - w) / 2)
+        rotation_matrix[1, 2] += int((canvas_size[0] - h) / 2)
+        if self.pad_with_fixed_color:
+            target_img = cv2.warpAffine(
+                img,
+                rotation_matrix, (canvas_size[1], canvas_size[0]),
+                flags=cv2.INTER_NEAREST,
+                borderValue=self.pad_value)
+        else:
+            mask = np.zeros_like(img)
+            (h_ind, w_ind) = (np.random.randint(0, h * 7 // 8),
+                              np.random.randint(0, w * 7 // 8))
+            img_cut = img[h_ind:(h_ind + h // 9), w_ind:(w_ind + w // 9)]
+            img_cut = cv2.resize(img_cut, (canvas_size[1], canvas_size[0]))
+            mask = cv2.warpAffine(
+                mask,
+                rotation_matrix, (canvas_size[1], canvas_size[0]),
+                borderValue=[1, 1, 1])
+            target_img = cv2.warpAffine(
+                img,
+                rotation_matrix, (canvas_size[1], canvas_size[0]),
+                borderValue=[0, 0, 0])
+            target_img = target_img + img_cut * mask
+        return target_img
+    def __call__(self, results):
+        if np.random.random_sample() < self.rotate_ratio:
+            image = results['image']
+            polygons = results['polys']
+            h, w = image.shape[:2]
+            angle = self.sample_angle(self.max_angle)
+            canvas_size = self.cal_canvas_size((h, w), angle)
+            center_shift = (int((canvas_size[1] - w) / 2), int(
+                (canvas_size[0] - h) / 2))
+            image = self.rotate_img(image, angle, canvas_size)
+            results['image'] = image
+            # rotate polygons
+            rotated_masks = []
+            for mask in polygons:
+                rotated_mask = self.rotate((w / 2, h / 2), mask, angle,
+                                           center_shift)
+                rotated_masks.append(rotated_mask)
+            results['polys'] = np.array(rotated_masks)
+        return results
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        return repr_str
+class SquareResizePad:
+    def __init__(self,
+                 target_size,
+                 pad_ratio=0.6,
+                 pad_with_fixed_color=False,
+                 pad_value=(0, 0, 0),
+                 **kwargs):
+        """Resize or pad images to be square shape.
+        Args:
+            target_size (int): The target size of square shaped image.
+            pad_with_fixed_color (bool): The flag for whether to pad rotated
+               image with fixed value. If set to False, the rescales image will
+               be padded onto cropped image.
+            pad_value (tuple(int)): The color value for padding rotated image.
+        """
+        assert isinstance(target_size, int)
+        assert isinstance(pad_ratio, float)
+        assert isinstance(pad_with_fixed_color, bool)
+        assert isinstance(pad_value, tuple)
+        self.target_size = target_size
+        self.pad_ratio = pad_ratio
+        self.pad_with_fixed_color = pad_with_fixed_color
+        self.pad_value = pad_value
+    def resize_img(self, img, keep_ratio=True):
+        h, w, _ = img.shape
+        if keep_ratio:
+            t_h = self.target_size if h >= w else int(h * self.target_size / w)
+            t_w = self.target_size if h <= w else int(w * self.target_size / h)
+        else:
+            t_h = t_w = self.target_size
+        img = cv2.resize(img, (t_w, t_h))
+        return img, (t_h, t_w)
+    def square_pad(self, img):
+        h, w = img.shape[:2]
+        if h == w:
+            return img, (0, 0)
+        pad_size = max(h, w)
+        if self.pad_with_fixed_color:
+            expand_img = np.ones((pad_size, pad_size, 3), dtype=np.uint8)
+            expand_img[:] = self.pad_value
+        else:
+            (h_ind, w_ind) = (np.random.randint(0, h * 7 // 8),
+                              np.random.randint(0, w * 7 // 8))
+            img_cut = img[h_ind:(h_ind + h // 9), w_ind:(w_ind + w // 9)]
+            expand_img = cv2.resize(img_cut, (pad_size, pad_size))
+        if h > w:
+            y0, x0 = 0, (h - w) // 2
+        else:
+            y0, x0 = (w - h) // 2, 0
+        expand_img[y0:y0 + h, x0:x0 + w] = img
+        offset = (x0, y0)
+        return expand_img, offset
+    def square_pad_mask(self, points, offset):
+        x0, y0 = offset
+        pad_points = points.copy()
+        pad_points[::2] = pad_points[::2] + x0
+        pad_points[1::2] = pad_points[1::2] + y0
+        return pad_points
+    def __call__(self, results):
+        image = results['image']
+        polygons = results['polys']
+        h, w = image.shape[:2]
+        if np.random.random_sample() < self.pad_ratio:
+            image, out_size = self.resize_img(image, keep_ratio=True)
+            image, offset = self.square_pad(image)
+        else:
+            image, out_size = self.resize_img(image, keep_ratio=False)
+            offset = (0, 0)
+        results['image'] = image
+        try:
+            polygons[:, :, 0::2] = polygons[:, :, 0::2] * out_size[
+                1] / w + offset[0]
+            polygons[:, :, 1::2] = polygons[:, :, 1::2] * out_size[
+                0] / h + offset[1]
+        except:
+            pass
+        results['polys'] = polygons
+        return results
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        return repr_str
--- a/ppocr/data/imaug/fce_targets.py
+++ b/ppocr/data/imaug/fce_targets.py
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is refer from:
+https://github.com/open-mmlab/mmocr/blob/main/mmocr/datasets/pipelines/textdet_targets/fcenet_targets.py
+"""
+import cv2
+import numpy as np
+from numpy.fft import fft
+from numpy.linalg import norm
+import sys
+class FCENetTargets:
+    """Generate the ground truth targets of FCENet: Fourier Contour Embedding
+    for Arbitrary-Shaped Text Detection.
+    [https://arxiv.org/abs/2104.10442]
+    Args:
+        fourier_degree (int): The maximum Fourier transform degree k.
+        resample_step (float): The step size for resampling the text center
+            line (TCL). It's better not to exceed half of the minimum width.
+        center_region_shrink_ratio (float): The shrink ratio of text center
+            region.
+        level_size_divisors (tuple(int)): The downsample ratio on each level.
+        level_proportion_range (tuple(tuple(int))): The range of text sizes
+            assigned to each level.
+    """
+    def __init__(self,
+                 fourier_degree=5,
+                 resample_step=4.0,
+                 center_region_shrink_ratio=0.3,
+                 level_size_divisors=(8, 16, 32),
+                 level_proportion_range=((0, 0.25), (0.2, 0.65), (0.55, 1.0)),
+                 orientation_thr=2.0,
+                 **kwargs):
+        super().__init__()
+        assert isinstance(level_size_divisors, tuple)
+        assert isinstance(level_proportion_range, tuple)
+        assert len(level_size_divisors) == len(level_proportion_range)
+        self.fourier_degree = fourier_degree
+        self.resample_step = resample_step
+        self.center_region_shrink_ratio = center_region_shrink_ratio
+        self.level_size_divisors = level_size_divisors
+        self.level_proportion_range = level_proportion_range
+        self.orientation_thr = orientation_thr
+    def vector_angle(self, vec1, vec2):
+        if vec1.ndim > 1:
+            unit_vec1 = vec1 / (norm(vec1, axis=-1) + 1e-8).reshape((-1, 1))
+        else:
+            unit_vec1 = vec1 / (norm(vec1, axis=-1) + 1e-8)
+        if vec2.ndim > 1:
+            unit_vec2 = vec2 / (norm(vec2, axis=-1) + 1e-8).reshape((-1, 1))
+        else:
+            unit_vec2 = vec2 / (norm(vec2, axis=-1) + 1e-8)
+        return np.arccos(
+            np.clip(
+                np.sum(unit_vec1 * unit_vec2, axis=-1), -1.0, 1.0))
+    def resample_line(self, line, n):
+        """Resample n points on a line.
+        Args:
+            line (ndarray): The points composing a line.
+            n (int): The resampled points number.
+        Returns:
+            resampled_line (ndarray): The points composing the resampled line.
+        """
+        assert line.ndim == 2
+        assert line.shape[0] >= 2
+        assert line.shape[1] == 2
+        assert isinstance(n, int)
+        assert n > 0
+        length_list = [
+            norm(line[i + 1] - line[i]) for i in range(len(line) - 1)
+        ]
+        total_length = sum(length_list)
+        length_cumsum = np.cumsum([0.0] + length_list)
+        delta_length = total_length / (float(n) + 1e-8)
+        current_edge_ind = 0
+        resampled_line = [line[0]]
+        for i in range(1, n):
+            current_line_len = i * delta_length
+            while current_line_len >= length_cumsum[current_edge_ind + 1]:
+                current_edge_ind += 1
+            current_edge_end_shift = current_line_len - length_cumsum[
+                current_edge_ind]
+            end_shift_ratio = current_edge_end_shift / length_list[
+                current_edge_ind]
+            current_point = line[current_edge_ind] + (line[current_edge_ind + 1]
+                                                      - line[current_edge_ind]
+                                                      ) * end_shift_ratio
+            resampled_line.append(current_point)
+        resampled_line.append(line[-1])
+        resampled_line = np.array(resampled_line)
+        return resampled_line
+    def reorder_poly_edge(self, points):
+        """Get the respective points composing head edge, tail edge, top
+        sideline and bottom sideline.
+        Args:
+            points (ndarray): The points composing a text polygon.
+        Returns:
+            head_edge (ndarray): The two points composing the head edge of text
+                polygon.
+            tail_edge (ndarray): The two points composing the tail edge of text
+                polygon.
+            top_sideline (ndarray): The points composing top curved sideline of
+                text polygon.
+            bot_sideline (ndarray): The points composing bottom curved sideline
+                of text polygon.
+        """
+        assert points.ndim == 2
+        assert points.shape[0] >= 4
+        assert points.shape[1] == 2
+        head_inds, tail_inds = self.find_head_tail(points, self.orientation_thr)
+        head_edge, tail_edge = points[head_inds], points[tail_inds]
+        pad_points = np.vstack([points, points])
+        if tail_inds[1] < 1:
+            tail_inds[1] = len(points)
+        sideline1 = pad_points[head_inds[1]:tail_inds[1]]
+        sideline2 = pad_points[tail_inds[1]:(head_inds[1] + len(points))]
+        sideline_mean_shift = np.mean(
+            sideline1, axis=0) - np.mean(
+                sideline2, axis=0)
+        if sideline_mean_shift[1] > 0:
+            top_sideline, bot_sideline = sideline2, sideline1
+        else:
+            top_sideline, bot_sideline = sideline1, sideline2
+        return head_edge, tail_edge, top_sideline, bot_sideline
+    def find_head_tail(self, points, orientation_thr):
+        """Find the head edge and tail edge of a text polygon.
+        Args:
+            points (ndarray): The points composing a text polygon.
+            orientation_thr (float): The threshold for distinguishing between
+                head edge and tail edge among the horizontal and vertical edges
+                of a quadrangle.
+        Returns:
+            head_inds (list): The indexes of two points composing head edge.
+            tail_inds (list): The indexes of two points composing tail edge.
+        """
+        assert points.ndim == 2
+        assert points.shape[0] >= 4
+        assert points.shape[1] == 2
+        assert isinstance(orientation_thr, float)
+        if len(points) > 4:
+            pad_points = np.vstack([points, points[0]])
+            edge_vec = pad_points[1:] - pad_points[:-1]
+            theta_sum = []
+            adjacent_vec_theta = []
+            for i, edge_vec1 in enumerate(edge_vec):
+                adjacent_ind = [x % len(edge_vec) for x in [i - 1, i + 1]]
+                adjacent_edge_vec = edge_vec[adjacent_ind]
+                temp_theta_sum = np.sum(
+                    self.vector_angle(edge_vec1, adjacent_edge_vec))
+                temp_adjacent_theta = self.vector_angle(adjacent_edge_vec[0],
+                                                        adjacent_edge_vec[1])
+                theta_sum.append(temp_theta_sum)
+                adjacent_vec_theta.append(temp_adjacent_theta)
+            theta_sum_score = np.array(theta_sum) / np.pi
+            adjacent_theta_score = np.array(adjacent_vec_theta) / np.pi
+            poly_center = np.mean(points, axis=0)
+            edge_dist = np.maximum(
+                norm(
+                    pad_points[1:] - poly_center, axis=-1),
+                norm(
+                    pad_points[:-1] - poly_center, axis=-1))
+            dist_score = edge_dist / np.max(edge_dist)
+            position_score = np.zeros(len(edge_vec))
+            score = 0.5 * theta_sum_score + 0.15 * adjacent_theta_score
+            score += 0.35 * dist_score
+            if len(points) % 2 == 0:
+                position_score[(len(score) // 2 - 1)] += 1
+                position_score[-1] += 1
+            score += 0.1 * position_score
+            pad_score = np.concatenate([score, score])
+            score_matrix = np.zeros((len(score), len(score) - 3))
+            x = np.arange(len(score) - 3) / float(len(score) - 4)
+            gaussian = 1. / (np.sqrt(2. * np.pi) * 0.5) * np.exp(-np.power(
+                (x - 0.5) / 0.5, 2.) / 2)
+            gaussian = gaussian / np.max(gaussian)
+            for i in range(len(score)):
+                score_matrix[i, :] = score[i] + pad_score[(i + 2):(i + len(
+                    score) - 1)] * gaussian * 0.3
+            head_start, tail_increment = np.unravel_index(score_matrix.argmax(),
+                                                          score_matrix.shape)
+            tail_start = (head_start + tail_increment + 2) % len(points)
+            head_end = (head_start + 1) % len(points)
+            tail_end = (tail_start + 1) % len(points)
+            if head_end > tail_end:
+                head_start, tail_start = tail_start, head_start
+                head_end, tail_end = tail_end, head_end
+            head_inds = [head_start, head_end]
+            tail_inds = [tail_start, tail_end]
+        else:
+            if self.vector_slope(points[1] - points[0]) + self.vector_slope(
+                    points[3] - points[2]) < self.vector_slope(points[
+                        2] - points[1]) + self.vector_slope(points[0] - points[
+                            3]):
+                horizontal_edge_inds = [[0, 1], [2, 3]]
+                vertical_edge_inds = [[3, 0], [1, 2]]
+            else:
+                horizontal_edge_inds = [[3, 0], [1, 2]]
+                vertical_edge_inds = [[0, 1], [2, 3]]
+            vertical_len_sum = norm(points[vertical_edge_inds[0][0]] - points[
+                vertical_edge_inds[0][1]]) + norm(points[vertical_edge_inds[1][
+                    0]] - points[vertical_edge_inds[1][1]])
+            horizontal_len_sum = norm(points[horizontal_edge_inds[0][
+                0]] - points[horizontal_edge_inds[0][1]]) + norm(points[
+                    horizontal_edge_inds[1][0]] - points[horizontal_edge_inds[1]
+                                                         [1]])
+            if vertical_len_sum > horizontal_len_sum * orientation_thr:
+                head_inds = horizontal_edge_inds[0]
+                tail_inds = horizontal_edge_inds[1]
+            else:
+                head_inds = vertical_edge_inds[0]
+                tail_inds = vertical_edge_inds[1]
+        return head_inds, tail_inds
+    def resample_sidelines(self, sideline1, sideline2, resample_step):
+        """Resample two sidelines to be of the same points number according to
+        step size.
+        Args:
+            sideline1 (ndarray): The points composing a sideline of a text
+                polygon.
+            sideline2 (ndarray): The points composing another sideline of a
+                text polygon.
+            resample_step (float): The resampled step size.
+        Returns:
+            resampled_line1 (ndarray): The resampled line 1.
+            resampled_line2 (ndarray): The resampled line 2.
+        """
+        assert sideline1.ndim == sideline2.ndim == 2
+        assert sideline1.shape[1] == sideline2.shape[1] == 2
+        assert sideline1.shape[0] >= 2
+        assert sideline2.shape[0] >= 2
+        assert isinstance(resample_step, float)
+        length1 = sum([
+            norm(sideline1[i + 1] - sideline1[i])
+            for i in range(len(sideline1) - 1)
+        ])
+        length2 = sum([
+            norm(sideline2[i + 1] - sideline2[i])
+            for i in range(len(sideline2) - 1)
+        ])
+        total_length = (length1 + length2) / 2
+        resample_point_num = max(int(float(total_length) / resample_step), 1)
+        resampled_line1 = self.resample_line(sideline1, resample_point_num)
+        resampled_line2 = self.resample_line(sideline2, resample_point_num)
+        return resampled_line1, resampled_line2
+    def generate_center_region_mask(self, img_size, text_polys):
+        """Generate text center region mask.
+        Args:
+            img_size (tuple): The image size of (height, width).
+            text_polys (list[list[ndarray]]): The list of text polygons.
+        Returns:
+            center_region_mask (ndarray): The text center region mask.
+        """
+        assert isinstance(img_size, tuple)
+        # assert check_argument.is_2dlist(text_polys)
+        h, w = img_size
+        center_region_mask = np.zeros((h, w), np.uint8)
+        center_region_boxes = []
+        for poly in text_polys:
+            # assert len(poly) == 1
+            polygon_points = poly.reshape(-1, 2)
+            _, _, top_line, bot_line = self.reorder_poly_edge(polygon_points)
+            resampled_top_line, resampled_bot_line = self.resample_sidelines(
+                top_line, bot_line, self.resample_step)
+            resampled_bot_line = resampled_bot_line[::-1]
+            center_line = (resampled_top_line + resampled_bot_line) / 2
+            line_head_shrink_len = norm(resampled_top_line[0] -
+                                        resampled_bot_line[0]) / 4.0
+            line_tail_shrink_len = norm(resampled_top_line[-1] -
+                                        resampled_bot_line[-1]) / 4.0
+            head_shrink_num = int(line_head_shrink_len // self.resample_step)
+            tail_shrink_num = int(line_tail_shrink_len // self.resample_step)
+            if len(center_line) > head_shrink_num + tail_shrink_num + 2:
+                center_line = center_line[head_shrink_num:len(center_line) -
+                                          tail_shrink_num]
+                resampled_top_line = resampled_top_line[head_shrink_num:len(
+                    resampled_top_line) - tail_shrink_num]
+                resampled_bot_line = resampled_bot_line[head_shrink_num:len(
+                    resampled_bot_line) - tail_shrink_num]
+            for i in range(0, len(center_line) - 1):
+                tl = center_line[i] + (resampled_top_line[i] - center_line[i]
+                                       ) * self.center_region_shrink_ratio
+                tr = center_line[i + 1] + (resampled_top_line[i + 1] -
+                                           center_line[i + 1]
+                                           ) * self.center_region_shrink_ratio
+                br = center_line[i + 1] + (resampled_bot_line[i + 1] -
+                                           center_line[i + 1]
+                                           ) * self.center_region_shrink_ratio
+                bl = center_line[i] + (resampled_bot_line[i] - center_line[i]
+                                       ) * self.center_region_shrink_ratio
+                current_center_box = np.vstack([tl, tr, br,
+                                                bl]).astype(np.int32)
+                center_region_boxes.append(current_center_box)
+        cv2.fillPoly(center_region_mask, center_region_boxes, 1)
+        return center_region_mask
+    def resample_polygon(self, polygon, n=400):
+        """Resample one polygon with n points on its boundary.
+        Args:
+            polygon (list[float]): The input polygon.
+            n (int): The number of resampled points.
+        Returns:
+            resampled_polygon (list[float]): The resampled polygon.
+        """
+        length = []
+        for i in range(len(polygon)):
+            p1 = polygon[i]
+            if i == len(polygon) - 1:
+                p2 = polygon[0]
+            else:
+                p2 = polygon[i + 1]
+            length.append(((p1[0] - p2[0])**2 + (p1[1] - p2[1])**2)**0.5)
+        total_length = sum(length)
+        n_on_each_line = (np.array(length) / (total_length + 1e-8)) * n
+        n_on_each_line = n_on_each_line.astype(np.int32)
+        new_polygon = []
+        for i in range(len(polygon)):
+            num = n_on_each_line[i]
+            p1 = polygon[i]
+            if i == len(polygon) - 1:
+                p2 = polygon[0]
+            else:
+                p2 = polygon[i + 1]
+            if num == 0:
+                continue
+            dxdy = (p2 - p1) / num
+            for j in range(num):
+                point = p1 + dxdy * j
+                new_polygon.append(point)
+        return np.array(new_polygon)
+    def normalize_polygon(self, polygon):
+        """Normalize one polygon so that its start point is at right most.
+        Args:
+            polygon (list[float]): The origin polygon.
+        Returns:
+            new_polygon (lost[float]): The polygon with start point at right.
+        """
+        temp_polygon = polygon - polygon.mean(axis=0)
+        x = np.abs(temp_polygon[:, 0])
+        y = temp_polygon[:, 1]
+        index_x = np.argsort(x)
+        index_y = np.argmin(y[index_x[:8]])
+        index = index_x[index_y]
+        new_polygon = np.concatenate([polygon[index:], polygon[:index]])
+        return new_polygon
+    def poly2fourier(self, polygon, fourier_degree):
+        """Perform Fourier transformation to generate Fourier coefficients ck
+        from polygon.
+        Args:
+            polygon (ndarray): An input polygon.
+            fourier_degree (int): The maximum Fourier degree K.
+        Returns:
+            c (ndarray(complex)): Fourier coefficients.
+        """
+        points = polygon[:, 0] + polygon[:, 1] * 1j
+        c_fft = fft(points) / len(points)
+        c = np.hstack((c_fft[-fourier_degree:], c_fft[:fourier_degree + 1]))
+        return c
+    def clockwise(self, c, fourier_degree):
+        """Make sure the polygon reconstructed from Fourier coefficients c in
+        the clockwise direction.
+        Args:
+            polygon (list[float]): The origin polygon.
+        Returns:
+            new_polygon (lost[float]): The polygon in clockwise point order.
+        """
+        if np.abs(c[fourier_degree + 1]) > np.abs(c[fourier_degree - 1]):
+            return c
+        elif np.abs(c[fourier_degree + 1]) < np.abs(c[fourier_degree - 1]):
+            return c[::-1]
+        else:
+            if np.abs(c[fourier_degree + 2]) > np.abs(c[fourier_degree - 2]):
+                return c
+            else:
+                return c[::-1]
+    def cal_fourier_signature(self, polygon, fourier_degree):
+        """Calculate Fourier signature from input polygon.
+        Args:
+              polygon (ndarray): The input polygon.
+              fourier_degree (int): The maximum Fourier degree K.
+        Returns:
+              fourier_signature (ndarray): An array shaped (2k+1, 2) containing
+                  real part and image part of 2k+1 Fourier coefficients.
+        """
+        resampled_polygon = self.resample_polygon(polygon)
+        resampled_polygon = self.normalize_polygon(resampled_polygon)
+        fourier_coeff = self.poly2fourier(resampled_polygon, fourier_degree)
+        fourier_coeff = self.clockwise(fourier_coeff, fourier_degree)
+        real_part = np.real(fourier_coeff).reshape((-1, 1))
+        image_part = np.imag(fourier_coeff).reshape((-1, 1))
+        fourier_signature = np.hstack([real_part, image_part])
+        return fourier_signature
+    def generate_fourier_maps(self, img_size, text_polys):
+        """Generate Fourier coefficient maps.
+        Args:
+            img_size (tuple): The image size of (height, width).
+            text_polys (list[list[ndarray]]): The list of text polygons.
+        Returns:
+            fourier_real_map (ndarray): The Fourier coefficient real part maps.
+            fourier_image_map (ndarray): The Fourier coefficient image part
+                maps.
+        """
+        assert isinstance(img_size, tuple)
+        h, w = img_size
+        k = self.fourier_degree
+        real_map = np.zeros((k * 2 + 1, h, w), dtype=np.float32)
+        imag_map = np.zeros((k * 2 + 1, h, w), dtype=np.float32)
+        for poly in text_polys:
+            mask = np.zeros((h, w), dtype=np.uint8)
+            polygon = np.array(poly).reshape((1, -1, 2))
+            cv2.fillPoly(mask, polygon.astype(np.int32), 1)
+            fourier_coeff = self.cal_fourier_signature(polygon[0], k)
+            for i in range(-k, k + 1):
+                if i != 0:
+                    real_map[i + k, :, :] = mask * fourier_coeff[i + k, 0] + (
+                        1 - mask) * real_map[i + k, :, :]
+                    imag_map[i + k, :, :] = mask * fourier_coeff[i + k, 1] + (
+                        1 - mask) * imag_map[i + k, :, :]
+                else:
+                    yx = np.argwhere(mask > 0.5)
+                    k_ind = np.ones((len(yx)), dtype=np.int64) * k
+                    y, x = yx[:, 0], yx[:, 1]
+                    real_map[k_ind, y, x] = fourier_coeff[k, 0] - x
+                    imag_map[k_ind, y, x] = fourier_coeff[k, 1] - y
+        return real_map, imag_map
+    def generate_text_region_mask(self, img_size, text_polys):
+        """Generate text center region mask and geometry attribute maps.
+        Args:
+            img_size (tuple): The image size (height, width).
+            text_polys (list[list[ndarray]]): The list of text polygons.
+        Returns:
+            text_region_mask (ndarray): The text region mask.
+        """
+        assert isinstance(img_size, tuple)
+        h, w = img_size
+        text_region_mask = np.zeros((h, w), dtype=np.uint8)
+        for poly in text_polys:
+            polygon = np.array(poly, dtype=np.int32).reshape((1, -1, 2))
+            cv2.fillPoly(text_region_mask, polygon, 1)
+        return text_region_mask
+    def generate_effective_mask(self, mask_size: tuple, polygons_ignore):
+        """Generate effective mask by setting the ineffective regions to 0 and
+        effective regions to 1.
+        Args:
+            mask_size (tuple): The mask size.
+            polygons_ignore (list[[ndarray]]: The list of ignored text
+                polygons.
+        Returns:
+            mask (ndarray): The effective mask of (height, width).
+        """
+        mask = np.ones(mask_size, dtype=np.uint8)
+        for poly in polygons_ignore:
+            instance = poly.reshape(-1, 2).astype(np.int32).reshape(1, -1, 2)
+            cv2.fillPoly(mask, instance, 0)
+        return mask
+    def generate_level_targets(self, img_size, text_polys, ignore_polys):
+        """Generate ground truth target on each level.
+        Args:
+            img_size (list[int]): Shape of input image.
+            text_polys (list[list[ndarray]]): A list of ground truth polygons.
+            ignore_polys (list[list[ndarray]]): A list of ignored polygons.
+        Returns:
+            level_maps (list(ndarray)): A list of ground target on each level.
+        """
+        h, w = img_size
+        lv_size_divs = self.level_size_divisors
+        lv_proportion_range = self.level_proportion_range
+        lv_text_polys = [[] for i in range(len(lv_size_divs))]
+        lv_ignore_polys = [[] for i in range(len(lv_size_divs))]
+        level_maps = []
+        for poly in text_polys:
+            polygon = np.array(poly, dtype=np.int).reshape((1, -1, 2))
+            _, _, box_w, box_h = cv2.boundingRect(polygon)
+            proportion = max(box_h, box_w) / (h + 1e-8)
+            for ind, proportion_range in enumerate(lv_proportion_range):
+                if proportion_range[0] < proportion < proportion_range[1]:
+                    lv_text_polys[ind].append(poly / lv_size_divs[ind])
+        for ignore_poly in ignore_polys:
+            polygon = np.array(ignore_poly, dtype=np.int).reshape((1, -1, 2))
+            _, _, box_w, box_h = cv2.boundingRect(polygon)
+            proportion = max(box_h, box_w) / (h + 1e-8)
+            for ind, proportion_range in enumerate(lv_proportion_range):
+                if proportion_range[0] < proportion < proportion_range[1]:
+                    lv_ignore_polys[ind].append(ignore_poly / lv_size_divs[ind])
+        for ind, size_divisor in enumerate(lv_size_divs):
+            current_level_maps = []
+            level_img_size = (h // size_divisor, w // size_divisor)
+            text_region = self.generate_text_region_mask(
+                level_img_size, lv_text_polys[ind])[None]
+            current_level_maps.append(text_region)
+            center_region = self.generate_center_region_mask(
+                level_img_size, lv_text_polys[ind])[None]
+            current_level_maps.append(center_region)
+            effective_mask = self.generate_effective_mask(
+                level_img_size, lv_ignore_polys[ind])[None]
+            current_level_maps.append(effective_mask)
+            fourier_real_map, fourier_image_maps = self.generate_fourier_maps(
+                level_img_size, lv_text_polys[ind])
+            current_level_maps.append(fourier_real_map)
+            current_level_maps.append(fourier_image_maps)
+            level_maps.append(np.concatenate(current_level_maps))
+        return level_maps
+    def generate_targets(self, results):
+        """Generate the ground truth targets for FCENet.
+        Args:
+            results (dict): The input result dictionary.
+        Returns:
+            results (dict): The output result dictionary.
+        """
+        assert isinstance(results, dict)
+        image = results['image']
+        polygons = results['polys']
+        ignore_tags = results['ignore_tags']
+        h, w, _ = image.shape
+        polygon_masks = []
+        polygon_masks_ignore = []
+        for tag, polygon in zip(ignore_tags, polygons):
+            if tag is True:
+                polygon_masks_ignore.append(polygon)
+            else:
+                polygon_masks.append(polygon)
+        level_maps = self.generate_level_targets((h, w), polygon_masks,
+                                                 polygon_masks_ignore)
+        mapping = {
+            'p3_maps': level_maps[0],
+            'p4_maps': level_maps[1],
+            'p5_maps': level_maps[2]
+        }
+        for key, value in mapping.items():
+            results[key] = value
+        return results
+    def __call__(self, results):
+        results = self.generate_targets(results)
+        return results
--- a/ppocr/data/imaug/label_ops.py
+++ b/ppocr/data/imaug/label_ops.py
@@ -785,6 +785,53 @@ class SARLabelEncode(BaseRecLabelEncode):
        return [self.padding_idx]
+class PRENLabelEncode(BaseRecLabelEncode):
+    def __init__(self,
+                 max_text_length,
+                 character_dict_path,
+                 use_space_char=False,
+                 **kwargs):
+        super(PRENLabelEncode, self).__init__(
+            max_text_length, character_dict_path, use_space_char)
+    def add_special_char(self, dict_character):
+        padding_str = '<PAD>'  # 0 
+        end_str = '<EOS>'  # 1
+        unknown_str = '<UNK>'  # 2
+        dict_character = [padding_str, end_str, unknown_str] + dict_character
+        self.padding_idx = 0
+        self.end_idx = 1
+        self.unknown_idx = 2
+        return dict_character
+    def encode(self, text):
+        if len(text) == 0 or len(text) >= self.max_text_len:
+            return None
+        if self.lower:
+            text = text.lower()
+        text_list = []
+        for char in text:
+            if char not in self.dict:
+                text_list.append(self.unknown_idx)
+            else:
+                text_list.append(self.dict[char])
+        text_list.append(self.end_idx)
+        if len(text_list) < self.max_text_len:
+            text_list += [self.padding_idx] * (
+                self.max_text_len - len(text_list))
+        return text_list
+    def __call__(self, data):
+        text = data['label']
+        encoded_text = self.encode(text)
+        if encoded_text is None:
+            return None
+        data['label'] = np.array(encoded_text)
+        return data
 class VQATokenLabelEncode(object):
    """
    Label encode for NLP VQA methods

--- a/ppocr/data/imaug/operators.py
+++ b/ppocr/data/imaug/operators.py
@@ -23,14 +23,20 @@ import sys
 import six
 import cv2
 import numpy as np
+import math
 class DecodeImage(object):
    """ decode image """
-    def __init__(self, img_mode='RGB', channel_first=False, **kwargs):
+    def __init__(self,
+                 img_mode='RGB',
+                 channel_first=False,
+                 ignore_orientation=False,
+                 **kwargs):
        self.img_mode = img_mode
        self.channel_first = channel_first
+        self.ignore_orientation = ignore_orientation
    def __call__(self, data):
        img = data['image']
@@ -41,7 +47,11 @@ class DecodeImage(object):
            assert type(img) is bytes and len(
                img) > 0, "invalid input 'img' in DecodeImage"
        img = np.frombuffer(img, dtype='uint8')
-        img = cv2.imdecode(img, 1)
+        if self.ignore_orientation:
+            img = cv2.imdecode(img, cv2.IMREAD_IGNORE_ORIENTATION |
+                               cv2.IMREAD_COLOR)
+        else:
+            img = cv2.imdecode(img, 1)
        if img is None:
            return None
        if self.img_mode == 'GRAY':
@@ -156,6 +166,44 @@ class KeepKeys(object):
        return data_list
+class Pad(object):
+    def __init__(self, size=None, size_div=32, **kwargs):
+        if size is not None and not isinstance(size, (int, list, tuple)):
+            raise TypeError("Type of target_size is invalid. Now is {}".format(
+                type(size)))
+        if isinstance(size, int):
+            size = [size, size]
+        self.size = size
+        self.size_div = size_div
+    def __call__(self, data):
+        img = data['image']
+        img_h, img_w = img.shape[0], img.shape[1]
+        if self.size:
+            resize_h2, resize_w2 = self.size
+            assert (
+                img_h < resize_h2 and img_w < resize_w2
+            ), '(h, w) of target size should be greater than (img_h, img_w)'
+        else:
+            resize_h2 = max(
+                int(math.ceil(img.shape[0] / self.size_div) * self.size_div),
+                self.size_div)
+            resize_w2 = max(
+                int(math.ceil(img.shape[1] / self.size_div) * self.size_div),
+                self.size_div)
+        img = cv2.copyMakeBorder(
+            img,
+            0,
+            resize_h2 - img_h,
+            0,
+            resize_w2 - img_w,
+            cv2.BORDER_CONSTANT,
+            value=0)
+        data['image'] = img
+        return data
 class Resize(object):
    def __init__(self, size=(640, 640), **kwargs):
        self.size = size

--- a/ppocr/data/imaug/rec_img_aug.py
+++ b/ppocr/data/imaug/rec_img_aug.py
@@ -141,6 +141,25 @@ class SARRecResizeImg(object):
        return data
+class PRENResizeImg(object):
+    def __init__(self, image_shape, **kwargs):
+        """
+        Accroding to original paper's realization, it's a hard resize method here. 
+        So maybe you should optimize it to fit for your task better.
+        """
+        self.dst_h, self.dst_w = image_shape
+    def __call__(self, data):
+        img = data['image']
+        resized_img = cv2.resize(
+            img, (self.dst_w, self.dst_h), interpolation=cv2.INTER_LINEAR)
+        resized_img = resized_img.transpose((2, 0, 1)) / 255
+        resized_img -= 0.5
+        resized_img /= 0.5
+        data['image'] = resized_img.astype(np.float32)
+        return data
 def resize_norm_img_sar(img, image_shape, width_downsample_ratio=0.25):
    imgC, imgH, imgW_min, imgW_max = image_shape
    h = img.shape[0]

--- a/ppocr/data/simple_dataset.py
+++ b/ppocr/data/simple_dataset.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 import numpy as np
 import os
+import json
 import random
 import traceback
 from paddle.io import Dataset

--- a/ppocr/losses/__init__.py
+++ b/ppocr/losses/__init__.py
@@ -24,6 +24,7 @@ from .det_db_loss import DBLoss
 from .det_east_loss import EASTLoss
 from .det_sast_loss import SASTLoss
 from .det_pse_loss import PSELoss
+from .det_fce_loss import FCELoss
 # rec loss
 from .rec_ctc_loss import CTCLoss
@@ -32,6 +33,7 @@ from .rec_srn_loss import SRNLoss
 from .rec_nrtr_loss import NRTRLoss
 from .rec_sar_loss import SARLoss
 from .rec_aster_loss import AsterLoss
+from .rec_pren_loss import PRENLoss
 # cls loss
 from .cls_loss import ClsLoss
@@ -55,10 +57,10 @@ from .vqa_token_layoutlm_loss import VQASerTokenLayoutLMLoss
 def build_loss(config):
    support_dict = [
-        'DBLoss', 'PSELoss', 'EASTLoss', 'SASTLoss', 'CTCLoss', 'ClsLoss',
+        'DBLoss', 'PSELoss', 'EASTLoss', 'SASTLoss', 'FCELoss', 'CTCLoss',
-        'AttentionLoss', 'SRNLoss', 'PGLoss', 'CombinedLoss', 'NRTRLoss',
+        'ClsLoss', 'AttentionLoss', 'SRNLoss', 'PGLoss', 'CombinedLoss',
-        'TableAttentionLoss', 'SARLoss', 'AsterLoss', 'SDMGRLoss',
+        'NRTRLoss', 'TableAttentionLoss', 'SARLoss', 'AsterLoss', 'SDMGRLoss',
-        'VQASerTokenLayoutLMLoss', 'LossFromOutput'
+        'VQASerTokenLayoutLMLoss', 'LossFromOutput', 'PRENLoss'
    ]
    config = copy.deepcopy(config)
    module_name = config.pop('name')

--- a/ppocr/losses/basic_loss.py
+++ b/ppocr/losses/basic_loss.py
@@ -95,9 +95,15 @@ class DMLLoss(nn.Layer):
            self.act = None
        self.use_log = use_log
        self.jskl_loss = KLJSLoss(mode="js")
+    def _kldiv(self, x, target):
+        eps = 1.0e-10
+        loss = target * (paddle.log(target + eps) - x)
+        # batch mean loss
+        loss = paddle.sum(loss) / loss.shape[0]
+        return loss
    def forward(self, out1, out2):
        if self.act is not None:
            out1 = self.act(out1)
@@ -106,9 +112,8 @@ class DMLLoss(nn.Layer):
            # for recognition distillation, log is needed for feature map
            log_out1 = paddle.log(out1)
            log_out2 = paddle.log(out2)
-            loss = (F.kl_div(
+            loss = (
-                log_out1, out2, reduction='batchmean') + F.kl_div(
+                self._kldiv(log_out1, out2) + self._kldiv(log_out2, out1)) / 2.0
-                    log_out2, out1, reduction='batchmean')) / 2.0
        else:
            # for detection distillation log is not needed
            loss = self.jskl_loss(out1, out2)

--- a/ppocr/losses/det_fce_loss.py
+++ b/ppocr/losses/det_fce_loss.py
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is refer from:
+https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/textdet/losses/fce_loss.py
+"""
+import numpy as np
+from paddle import nn
+import paddle
+import paddle.nn.functional as F
+from functools import partial
+def multi_apply(func, *args, **kwargs):
+    pfunc = partial(func, **kwargs) if kwargs else func
+    map_results = map(pfunc, *args)
+    return tuple(map(list, zip(*map_results)))
+class FCELoss(nn.Layer):
+    """The class for implementing FCENet loss
+    FCENet(CVPR2021): Fourier Contour Embedding for Arbitrary-shaped
+        Text Detection
+    [https://arxiv.org/abs/2104.10442]
+    Args:
+        fourier_degree (int) : The maximum Fourier transform degree k.
+        num_sample (int) : The sampling points number of regression
+            loss. If it is too small, fcenet tends to be overfitting.
+        ohem_ratio (float): the negative/positive ratio in OHEM.
+    """
+    def __init__(self, fourier_degree, num_sample, ohem_ratio=3.):
+        super().__init__()
+        self.fourier_degree = fourier_degree
+        self.num_sample = num_sample
+        self.ohem_ratio = ohem_ratio
+    def forward(self, preds, labels):
+        assert isinstance(preds, dict)
+        preds = preds['levels']
+        p3_maps, p4_maps, p5_maps = labels[1:]
+        assert p3_maps[0].shape[0] == 4 * self.fourier_degree + 5,\
+            'fourier degree not equal in FCEhead and FCEtarget'
+        # to tensor
+        gts = [p3_maps, p4_maps, p5_maps]
+        for idx, maps in enumerate(gts):
+            gts[idx] = paddle.to_tensor(np.stack(maps))
+        losses = multi_apply(self.forward_single, preds, gts)
+        loss_tr = paddle.to_tensor(0.).astype('float32')
+        loss_tcl = paddle.to_tensor(0.).astype('float32')
+        loss_reg_x = paddle.to_tensor(0.).astype('float32')
+        loss_reg_y = paddle.to_tensor(0.).astype('float32')
+        loss_all = paddle.to_tensor(0.).astype('float32')
+        for idx, loss in enumerate(losses):
+            loss_all += sum(loss)
+            if idx == 0:
+                loss_tr += sum(loss)
+            elif idx == 1:
+                loss_tcl += sum(loss)
+            elif idx == 2:
+                loss_reg_x += sum(loss)
+            else:
+                loss_reg_y += sum(loss)
+        results = dict(
+            loss=loss_all,
+            loss_text=loss_tr,
+            loss_center=loss_tcl,
+            loss_reg_x=loss_reg_x,
+            loss_reg_y=loss_reg_y, )
+        return results
+    def forward_single(self, pred, gt):
+        cls_pred = paddle.transpose(pred[0], (0, 2, 3, 1))
+        reg_pred = paddle.transpose(pred[1], (0, 2, 3, 1))
+        gt = paddle.transpose(gt, (0, 2, 3, 1))
+        k = 2 * self.fourier_degree + 1
+        tr_pred = paddle.reshape(cls_pred[:, :, :, :2], (-1, 2))
+        tcl_pred = paddle.reshape(cls_pred[:, :, :, 2:], (-1, 2))
+        x_pred = paddle.reshape(reg_pred[:, :, :, 0:k], (-1, k))
+        y_pred = paddle.reshape(reg_pred[:, :, :, k:2 * k], (-1, k))
+        tr_mask = gt[:, :, :, :1].reshape([-1])
+        tcl_mask = gt[:, :, :, 1:2].reshape([-1])
+        train_mask = gt[:, :, :, 2:3].reshape([-1])
+        x_map = paddle.reshape(gt[:, :, :, 3:3 + k], (-1, k))
+        y_map = paddle.reshape(gt[:, :, :, 3 + k:], (-1, k))
+        tr_train_mask = (train_mask * tr_mask).astype('bool')
+        tr_train_mask2 = paddle.concat(
+            [tr_train_mask.unsqueeze(1), tr_train_mask.unsqueeze(1)], axis=1)
+        # tr loss
+        loss_tr = self.ohem(tr_pred, tr_mask, train_mask)
+        # tcl loss
+        loss_tcl = paddle.to_tensor(0.).astype('float32')
+        tr_neg_mask = tr_train_mask.logical_not()
+        tr_neg_mask2 = paddle.concat(
+            [tr_neg_mask.unsqueeze(1), tr_neg_mask.unsqueeze(1)], axis=1)
+        if tr_train_mask.sum().item() > 0:
+            loss_tcl_pos = F.cross_entropy(
+                tcl_pred.masked_select(tr_train_mask2).reshape([-1, 2]),
+                tcl_mask.masked_select(tr_train_mask).astype('int64'))
+            loss_tcl_neg = F.cross_entropy(
+                tcl_pred.masked_select(tr_neg_mask2).reshape([-1, 2]),
+                tcl_mask.masked_select(tr_neg_mask).astype('int64'))
+            loss_tcl = loss_tcl_pos + 0.5 * loss_tcl_neg
+        # regression loss
+        loss_reg_x = paddle.to_tensor(0.).astype('float32')
+        loss_reg_y = paddle.to_tensor(0.).astype('float32')
+        if tr_train_mask.sum().item() > 0:
+            weight = (tr_mask.masked_select(tr_train_mask.astype('bool'))
+                      .astype('float32') + tcl_mask.masked_select(
+                          tr_train_mask.astype('bool')).astype('float32')) / 2
+            weight = weight.reshape([-1, 1])
+            ft_x, ft_y = self.fourier2poly(x_map, y_map)
+            ft_x_pre, ft_y_pre = self.fourier2poly(x_pred, y_pred)
+            dim = ft_x.shape[1]
+            tr_train_mask3 = paddle.concat(
+                [tr_train_mask.unsqueeze(1) for i in range(dim)], axis=1)
+            loss_reg_x = paddle.mean(weight * F.smooth_l1_loss(
+                ft_x_pre.masked_select(tr_train_mask3).reshape([-1, dim]),
+                ft_x.masked_select(tr_train_mask3).reshape([-1, dim]),
+                reduction='none'))
+            loss_reg_y = paddle.mean(weight * F.smooth_l1_loss(
+                ft_y_pre.masked_select(tr_train_mask3).reshape([-1, dim]),
+                ft_y.masked_select(tr_train_mask3).reshape([-1, dim]),
+                reduction='none'))
+        return loss_tr, loss_tcl, loss_reg_x, loss_reg_y
+    def ohem(self, predict, target, train_mask):
+        pos = (target * train_mask).astype('bool')
+        neg = ((1 - target) * train_mask).astype('bool')
+        pos2 = paddle.concat([pos.unsqueeze(1), pos.unsqueeze(1)], axis=1)
+        neg2 = paddle.concat([neg.unsqueeze(1), neg.unsqueeze(1)], axis=1)
+        n_pos = pos.astype('float32').sum()
+        if n_pos.item() > 0:
+            loss_pos = F.cross_entropy(
+                predict.masked_select(pos2).reshape([-1, 2]),
+                target.masked_select(pos).astype('int64'),
+                reduction='sum')
+            loss_neg = F.cross_entropy(
+                predict.masked_select(neg2).reshape([-1, 2]),
+                target.masked_select(neg).astype('int64'),
+                reduction='none')
+            n_neg = min(
+                int(neg.astype('float32').sum().item()),
+                int(self.ohem_ratio * n_pos.astype('float32')))
+        else:
+            loss_pos = paddle.to_tensor(0.)
+            loss_neg = F.cross_entropy(
+                predict.masked_select(neg2).reshape([-1, 2]),
+                target.masked_select(neg).astype('int64'),
+                reduction='none')
+            n_neg = 100
+        if len(loss_neg) > n_neg:
+            loss_neg, _ = paddle.topk(loss_neg, n_neg)
+        return (loss_pos + loss_neg.sum()) / (n_pos + n_neg).astype('float32')
+    def fourier2poly(self, real_maps, imag_maps):
+        """Transform Fourier coefficient maps to polygon maps.
+        Args:
+            real_maps (tensor): A map composed of the real parts of the
+                Fourier coefficients, whose shape is (-1, 2k+1)
+            imag_maps (tensor):A map composed of the imag parts of the
+                Fourier coefficients, whose shape is (-1, 2k+1)
+        Returns
+            x_maps (tensor): A map composed of the x value of the polygon
+                represented by n sample points (xn, yn), whose shape is (-1, n)
+            y_maps (tensor): A map composed of the y value of the polygon
+                represented by n sample points (xn, yn), whose shape is (-1, n)
+        """
+        k_vect = paddle.arange(
+            -self.fourier_degree, self.fourier_degree + 1,
+            dtype='float32').reshape([-1, 1])
+        i_vect = paddle.arange(
+            0, self.num_sample, dtype='float32').reshape([1, -1])
+        transform_matrix = 2 * np.pi / self.num_sample * paddle.matmul(k_vect,
+                                                                       i_vect)
+        x1 = paddle.einsum('ak, kn-> an', real_maps,
+                           paddle.cos(transform_matrix))
+        x2 = paddle.einsum('ak, kn-> an', imag_maps,
+                           paddle.sin(transform_matrix))
+        y1 = paddle.einsum('ak, kn-> an', real_maps,
+                           paddle.sin(transform_matrix))
+        y2 = paddle.einsum('ak, kn-> an', imag_maps,
+                           paddle.cos(transform_matrix))
+        x_maps = x1 - x2
+        y_maps = y1 + y2
+        return x_maps, y_maps
--- a/ppocr/losses/rec_pren_loss.py
+++ b/ppocr/losses/rec_pren_loss.py
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from paddle import nn
+class PRENLoss(nn.Layer):
+    def __init__(self, **kwargs):
+        super(PRENLoss, self).__init__()
+        # note: 0 is padding idx
+        self.loss_func = nn.CrossEntropyLoss(reduction='mean', ignore_index=0)
+    def forward(self, predicts, batch):
+        loss = self.loss_func(predicts, batch[1].astype('int64'))
+        return {'loss': loss}
--- a/ppocr/metrics/__init__.py
+++ b/ppocr/metrics/__init__.py
@@ -21,7 +21,7 @@ import copy
 __all__ = ["build_metric"]
-from .det_metric import DetMetric
+from .det_metric import DetMetric, DetFCEMetric
 from .rec_metric import RecMetric
 from .cls_metric import ClsMetric
 from .e2e_metric import E2EMetric
@@ -34,7 +34,7 @@ from .vqa_token_re_metric import VQAReTokenMetric
 def build_metric(config):
    support_dict = [
-        "DetMetric", "RecMetric", "ClsMetric", "E2EMetric",
+        "DetMetric", "DetFCEMetric", "RecMetric", "ClsMetric", "E2EMetric",
        "DistillationMetric", "TableMetric", 'KIEMetric', 'VQASerTokenMetric',
        'VQAReTokenMetric'
    ]

--- a/ppocr/metrics/det_metric.py
+++ b/ppocr/metrics/det_metric.py
@@ -16,7 +16,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
-__all__ = ['DetMetric']
+__all__ = ['DetMetric', 'DetFCEMetric']
 from .eval_det_iou import DetectionIoUEvaluator
@@ -55,7 +55,6 @@ class DetMetric(object):
            result = self.evaluator.evaluate_image(gt_info_list, det_info_list)
            self.results.append(result)
    def get_metric(self):
        """
        return metrics {
@@ -71,3 +70,85 @@ class DetMetric(object):
    def reset(self):
        self.results = []  # clear results
+class DetFCEMetric(object):
+    def __init__(self, main_indicator='hmean', **kwargs):
+        self.evaluator = DetectionIoUEvaluator()
+        self.main_indicator = main_indicator
+        self.reset()
+    def __call__(self, preds, batch, **kwargs):
+        '''
+       batch: a list produced by dataloaders.
+           image: np.ndarray  of shape (N, C, H, W).
+           ratio_list: np.ndarray  of shape(N,2)
+           polygons: np.ndarray  of shape (N, K, 4, 2), the polygons of objective regions.
+           ignore_tags: np.ndarray  of shape (N, K), indicates whether a region is ignorable or not.
+       preds: a list of dict produced by post process
+            points: np.ndarray of shape (N, K, 4, 2), the polygons of objective regions.
+       '''
+        gt_polyons_batch = batch[2]
+        ignore_tags_batch = batch[3]
+        for pred, gt_polyons, ignore_tags in zip(preds, gt_polyons_batch,
+                                                 ignore_tags_batch):
+            # prepare gt
+            gt_info_list = [{
+                'points': gt_polyon,
+                'text': '',
+                'ignore': ignore_tag
+            } for gt_polyon, ignore_tag in zip(gt_polyons, ignore_tags)]
+            # prepare det
+            det_info_list = [{
+                'points': det_polyon,
+                'text': '',
+                'score': score
+            } for det_polyon, score in zip(pred['points'], pred['scores'])]
+            for score_thr in self.results.keys():
+                det_info_list_thr = [
+                    det_info for det_info in det_info_list
+                    if det_info['score'] >= score_thr
+                ]
+                result = self.evaluator.evaluate_image(gt_info_list,
+                                                       det_info_list_thr)
+                self.results[score_thr].append(result)
+    def get_metric(self):
+        """
+        return metrics {'heman':0,
+            'thr 0.3':'precision: 0 recall: 0 hmean: 0',
+            'thr 0.4':'precision: 0 recall: 0 hmean: 0',
+            'thr 0.5':'precision: 0 recall: 0 hmean: 0',
+            'thr 0.6':'precision: 0 recall: 0 hmean: 0',
+            'thr 0.7':'precision: 0 recall: 0 hmean: 0',
+            'thr 0.8':'precision: 0 recall: 0 hmean: 0',
+            'thr 0.9':'precision: 0 recall: 0 hmean: 0',
+            }
+        """
+        metircs = {}
+        hmean = 0
+        for score_thr in self.results.keys():
+            metirc = self.evaluator.combine_results(self.results[score_thr])
+            # for key, value in metirc.items():
+            #     metircs['{}_{}'.format(key, score_thr)] = value
+            metirc_str = 'precision:{:.5f} recall:{:.5f} hmean:{:.5f}'.format(
+                metirc['precision'], metirc['recall'], metirc['hmean'])
+            metircs['thr {}'.format(score_thr)] = metirc_str
+            hmean = max(hmean, metirc['hmean'])
+        metircs['hmean'] = hmean
+        self.reset()
+        return metircs
+    def reset(self):
+        self.results = {
+            0.3: [],
+            0.4: [],
+            0.5: [],
+            0.6: [],
+            0.7: [],
+            0.8: [],
+            0.9: []
+        }  # clear results
--- a/ppocr/modeling/backbones/__init__.py
+++ b/ppocr/modeling/backbones/__init__.py
@@ -30,9 +30,10 @@ def build_backbone(config, model_type):
        from .rec_resnet_31 import ResNet31
        from .rec_resnet_aster import ResNet_ASTER
        from .rec_micronet import MicroNet
+        from .rec_efficientb3_pren import EfficientNetb3_PREN
        support_dict = [
            'MobileNetV1Enhance', 'MobileNetV3', 'ResNet', 'ResNetFPN', 'MTB',
-            "ResNet31", "ResNet_ASTER", 'MicroNet'
+            "ResNet31", "ResNet_ASTER", 'MicroNet', 'EfficientNetb3_PREN'
        ]
    elif model_type == "e2e":
        from .e2e_resnet_vd_pg import ResNet

--- a/ppocr/modeling/backbones/det_resnet_vd.py
+++ b/ppocr/modeling/backbones/det_resnet_vd.py
@@ -21,9 +21,82 @@ from paddle import ParamAttr
 import paddle.nn as nn
 import paddle.nn.functional as F
+from paddle.vision.ops import DeformConv2D
+from paddle.regularizer import L2Decay
+from paddle.nn.initializer import Normal, Constant, XavierUniform
 __all__ = ["ResNet"]
+class DeformableConvV2(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 groups=1,
+                 weight_attr=None,
+                 bias_attr=None,
+                 lr_scale=1,
+                 regularizer=None,
+                 skip_quant=False,
+                 dcn_bias_regularizer=L2Decay(0.),
+                 dcn_bias_lr_scale=2.):
+        super(DeformableConvV2, self).__init__()
+        self.offset_channel = 2 * kernel_size**2 * groups
+        self.mask_channel = kernel_size**2 * groups
+        if bias_attr:
+            # in FCOS-DCN head, specifically need learning_rate and regularizer
+            dcn_bias_attr = ParamAttr(
+                initializer=Constant(value=0),
+                regularizer=dcn_bias_regularizer,
+                learning_rate=dcn_bias_lr_scale)
+        else:
+            # in ResNet backbone, do not need bias
+            dcn_bias_attr = False
+        self.conv_dcn = DeformConv2D(
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride=stride,
+            padding=(kernel_size - 1) // 2 * dilation,
+            dilation=dilation,
+            deformable_groups=groups,
+            weight_attr=weight_attr,
+            bias_attr=dcn_bias_attr)
+        if lr_scale == 1 and regularizer is None:
+            offset_bias_attr = ParamAttr(initializer=Constant(0.))
+        else:
+            offset_bias_attr = ParamAttr(
+                initializer=Constant(0.),
+                learning_rate=lr_scale,
+                regularizer=regularizer)
+        self.conv_offset = nn.Conv2D(
+            in_channels,
+            groups * 3 * kernel_size**2,
+            kernel_size,
+            stride=stride,
+            padding=(kernel_size - 1) // 2,
+            weight_attr=ParamAttr(initializer=Constant(0.0)),
+            bias_attr=offset_bias_attr)
+        if skip_quant:
+            self.conv_offset.skip_quant = True
+    def forward(self, x):
+        offset_mask = self.conv_offset(x)
+        offset, mask = paddle.split(
+            offset_mask,
+            num_or_sections=[self.offset_channel, self.mask_channel],
+            axis=1)
+        mask = F.sigmoid(mask)
+        y = self.conv_dcn(x, offset, mask=mask)
+        return y
 class ConvBNLayer(nn.Layer):
    def __init__(self,
                 in_channels,
@@ -32,20 +105,31 @@ class ConvBNLayer(nn.Layer):
                 stride=1,
                 groups=1,
                 is_vd_mode=False,
-                 act=None):
+                 act=None,
+                 is_dcn=False):
        super(ConvBNLayer, self).__init__()
        self.is_vd_mode = is_vd_mode
        self._pool2d_avg = nn.AvgPool2D(
            kernel_size=2, stride=2, padding=0, ceil_mode=True)
-        self._conv = nn.Conv2D(
+        if not is_dcn:
-            in_channels=in_channels,
+            self._conv = nn.Conv2D(
-            out_channels=out_channels,
+                in_channels=in_channels,
-            kernel_size=kernel_size,
+                out_channels=out_channels,
-            stride=stride,
+                kernel_size=kernel_size,
-            padding=(kernel_size - 1) // 2,
+                stride=stride,
-            groups=groups,
+                padding=(kernel_size - 1) // 2,
-            bias_attr=False)
+                groups=groups,
+                bias_attr=False)
+        else:
+            self._conv = DeformableConvV2(
+                in_channels=in_channels,
+                out_channels=out_channels,
+                kernel_size=kernel_size,
+                stride=stride,
+                padding=(kernel_size - 1) // 2,
+                groups=2,  #groups,
+                bias_attr=False)
        self._batch_norm = nn.BatchNorm(out_channels, act=act)
    def forward(self, inputs):
@@ -57,12 +141,14 @@ class ConvBNLayer(nn.Layer):
 class BottleneckBlock(nn.Layer):
-    def __init__(self,
+    def __init__(
-                 in_channels,
+            self,
-                 out_channels,
+            in_channels,
-                 stride,
+            out_channels,
-                 shortcut=True,
+            stride,
-                 if_first=False):
+            shortcut=True,
+            if_first=False,
+            is_dcn=False, ):
        super(BottleneckBlock, self).__init__()
        self.conv0 = ConvBNLayer(
@@ -75,7 +161,8 @@ class BottleneckBlock(nn.Layer):
            out_channels=out_channels,
            kernel_size=3,
            stride=stride,
-            act='relu')
+            act='relu',
+            is_dcn=is_dcn)
        self.conv2 = ConvBNLayer(
            in_channels=out_channels,
            out_channels=out_channels * 4,
@@ -152,7 +239,12 @@ class BasicBlock(nn.Layer):
 class ResNet(nn.Layer):
-    def __init__(self, in_channels=3, layers=50, **kwargs):
+    def __init__(self,
+                 in_channels=3,
+                 layers=50,
+                 dcn_stage=None,
+                 out_indices=None,
+                 **kwargs):
        super(ResNet, self).__init__()
        self.layers = layers
@@ -175,6 +267,13 @@ class ResNet(nn.Layer):
                        1024] if layers >= 50 else [64, 64, 128, 256]
        num_filters = [64, 128, 256, 512]
+        self.dcn_stage = dcn_stage if dcn_stage is not None else [
+            False, False, False, False
+        ]
+        self.out_indices = out_indices if out_indices is not None else [
+            0, 1, 2, 3
+        ]
        self.conv1_1 = ConvBNLayer(
            in_channels=in_channels,
            out_channels=32,
@@ -201,6 +300,7 @@ class ResNet(nn.Layer):
            for block in range(len(depth)):
                block_list = []
                shortcut = False
+                is_dcn = self.dcn_stage[block]
                for i in range(depth[block]):
                    bottleneck_block = self.add_sublayer(
                        'bb_%d_%d' % (block, i),
@@ -210,15 +310,18 @@ class ResNet(nn.Layer):
                            out_channels=num_filters[block],
                            stride=2 if i == 0 and block != 0 else 1,
                            shortcut=shortcut,
-                            if_first=block == i == 0))
+                            if_first=block == i == 0,
+                            is_dcn=is_dcn))
                    shortcut = True
                    block_list.append(bottleneck_block)
-                self.out_channels.append(num_filters[block] * 4)
+                if block in self.out_indices:
+                    self.out_channels.append(num_filters[block] * 4)
                self.stages.append(nn.Sequential(*block_list))
        else:
            for block in range(len(depth)):
                block_list = []
                shortcut = False
+                # is_dcn = self.dcn_stage[block]
                for i in range(depth[block]):
                    basic_block = self.add_sublayer(
                        'bb_%d_%d' % (block, i),
@@ -231,7 +334,8 @@ class ResNet(nn.Layer):
                            if_first=block == i == 0))
                    shortcut = True
                    block_list.append(basic_block)
-                self.out_channels.append(num_filters[block])
+                if block in self.out_indices:
+                    self.out_channels.append(num_filters[block])
                self.stages.append(nn.Sequential(*block_list))
    def forward(self, inputs):
@@ -240,7 +344,8 @@ class ResNet(nn.Layer):
        y = self.conv1_3(y)
        y = self.pool2d_max(y)
        out = []
-        for block in self.stages:
+        for i, block in enumerate(self.stages):
            y = block(y)
-            out.append(y)
+            if i in self.out_indices:
+                out.append(y)
        return out