add fcenet

9f62b610 · zhiminzhang0830 · 5876f3f4 · 9f62b610 · 9f62b610 · 9f62b610
Commit 9f62b610 authored Jan 27, 2022 by zhiminzhang0830
17 changed files
--- a/configs/det/det_r50_fce_ctw.yml
+++ b/configs/det/det_r50_fce_ctw.yml
+Global:
+  use_gpu: true
+  epoch_num: 1500
+  log_smooth_window: 20
+  print_batch_step: 20
+  save_model_dir: ./output/fce_r50_ctw/
+  save_epoch_step: 100
+  # evaluation is run every 835 iterations
+  eval_batch_step: [0, 835]
+  cal_metric_during_train: False
+  pretrained_model: ../pretrain_models/ResNet50_vd_ssld_pretrained 
+  checkpoints: #output/fce_r50_ctw/latest
+  save_inference_dir: 
+  use_visualdl: False
+  infer_img: doc/imgs_en/img_10.jpg
+  save_res_path: ./output/fce_r50_ctw/predicts_ctw.txt
+Architecture:
+  model_type: det
+  algorithm: FCE
+  Transform:
+  Backbone:
+    name: ResNet
+    layers: 50
+    dcn_stage: [False, True, True, True]
+    out_indices: [1,2,3]
+  Neck:
+    name: FCEFPN
+    in_channels: [512, 1024, 2048]
+    out_channels: 256
+    has_extra_convs: False
+    extra_stage: 0
+  Head:
+    name: FCEHead
+    in_channels: 256
+    scales: [8, 16, 32]
+    fourier_degree: 5
+Loss:
+  name: FCELoss
+  fourier_degree: 5
+  num_sample: 50
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  lr:
+    learning_rate: 0.0001
+  regularizer:
+    name: 'L2'
+    factor: 0
+PostProcess:
+  name: FCEPostProcess
+  scales: [8, 16, 32]
+  alpha: 1.0
+  beta: 1.0
+  fourier_degree: 5
+Metric:
+  name: DetFCEMetric
+  main_indicator: hmean
+Train:
+  dataset:
+    name: SimpleDataSet
+    data_dir: /data/Dataset/OCR_det/ctw1500/imgs/
+    label_file_list: 
+      - /data/Dataset/OCR_det/ctw1500/imgs/training.txt
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+          ignore_orientation: True
+      - DetLabelEncode: # Class handling label
+      - ColorJitter: 
+          brightness: 0.142
+          saturation: 0.5
+          contrast: 0.5
+      - RandomScaling: 
+      - RandomCropFlip:
+          crop_ratio: 0.5
+      - RandomCropPolyInstances:
+          crop_ratio: 0.8
+          min_side_ratio: 0.3
+      - RandomRotatePolyInstances:
+          rotate_ratio: 0.5
+          max_angle: 30
+          pad_with_fixed_color: False
+      - SquareResizePad:
+          target_size: 800
+          pad_ratio: 0.6
+      - IaaAugment:
+          augmenter_args:
+            - { 'type': Fliplr, 'args': { 'p': 0.5 } }
+      - FCENetTargets:
+          fourier_degree: 5
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+          order: 'hwc'
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: ['image', 'p3_maps', 'p4_maps', 'p5_maps'] # dataloader will return list in this order
+  loader:
+    shuffle: True
+    drop_last: False
+    batch_size_per_card: 6
+    num_workers: 8
+Eval:
+  dataset:
+    name: SimpleDataSet
+    data_dir: /data/Dataset/OCR_det/ctw1500/imgs/
+    label_file_list:
+      - /data/Dataset/OCR_det/ctw1500/imgs/test.txt
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+          ignore_orientation: True
+      - DetLabelEncode: # Class handling label
+      - DetResizeForTest:
+          # resize_long: 1280
+          rescale_img: [1080, 736]
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+          order: 'hwc'
+      - Pad: 
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 1 # must be 1
+    num_workers: 2
\ No newline at end of file
--- a/ppocr/data/imaug/__init__.py
+++ b/ppocr/data/imaug/__init__.py
@@ -36,6 +36,9 @@ from .gen_table_mask import *
 from .vqa import *
+from .fce_aug import *
+from .fce_targets import FCENetTargets
 def transform(data, ops=None):
    """ transform """

--- a/ppocr/data/imaug/fce_aug.py
+++ b/ppocr/data/imaug/fce_aug.py
--- a/ppocr/data/imaug/fce_targets.py
+++ b/ppocr/data/imaug/fce_targets.py
--- a/ppocr/data/imaug/operators.py
+++ b/ppocr/data/imaug/operators.py
@@ -60,9 +60,14 @@ class DecodeImage(object):
 class NRTRDecodeImage(object):
    """ decode image """
-    def __init__(self, img_mode='RGB', channel_first=False, **kwargs):
+    def __init__(self,
+                 img_mode='RGB',
+                 channel_first=False,
+                 ignore_orientation=False,
+                 **kwargs):
        self.img_mode = img_mode
        self.channel_first = channel_first
+        self.ignore_orientation = ignore_orientation
    def __call__(self, data):
        img = data['image']
@@ -74,6 +79,10 @@ class NRTRDecodeImage(object):
                img) > 0, "invalid input 'img' in DecodeImage"
        img = np.frombuffer(img, dtype='uint8')
+        if self.ignore_orientation:
+            img = cv2.imdecode(img, cv2.IMREAD_IGNORE_ORIENTATION |
+                               cv2.IMREAD_COLOR)
+        else:
            img = cv2.imdecode(img, 1)
        if img is None:

--- a/ppocr/losses/__init__.py
+++ b/ppocr/losses/__init__.py
@@ -24,6 +24,7 @@ from .det_db_loss import DBLoss
 from .det_east_loss import EASTLoss
 from .det_sast_loss import SASTLoss
 from .det_pse_loss import PSELoss
+from .det_fce_loss import FCELoss
 # rec loss
 from .rec_ctc_loss import CTCLoss
@@ -55,9 +56,9 @@ from .vqa_token_layoutlm_loss import VQASerTokenLayoutLMLoss
 def build_loss(config):
    support_dict = [
-        'DBLoss', 'PSELoss', 'EASTLoss', 'SASTLoss', 'CTCLoss', 'ClsLoss',
+        'DBLoss', 'PSELoss', 'EASTLoss', 'SASTLoss', 'FCELoss', 'CTCLoss',
-        'AttentionLoss', 'SRNLoss', 'PGLoss', 'CombinedLoss', 'NRTRLoss',
+        'ClsLoss', 'AttentionLoss', 'SRNLoss', 'PGLoss', 'CombinedLoss',
-        'TableAttentionLoss', 'SARLoss', 'AsterLoss', 'SDMGRLoss',
+        'NRTRLoss', 'TableAttentionLoss', 'SARLoss', 'AsterLoss', 'SDMGRLoss',
        'VQASerTokenLayoutLMLoss', 'LossFromOutput'
    ]
    config = copy.deepcopy(config)

--- a/ppocr/losses/det_fce_loss.py
+++ b/ppocr/losses/det_fce_loss.py
+import numpy as np
+from paddle import nn
+import paddle
+import paddle.nn.functional as F
+from functools import partial
+def multi_apply(func, *args, **kwargs):
+    pfunc = partial(func, **kwargs) if kwargs else func
+    map_results = map(pfunc, *args)
+    return tuple(map(list, zip(*map_results)))
+class FCELoss(nn.Layer):
+    """The class for implementing FCENet loss
+    FCENet(CVPR2021): Fourier Contour Embedding for Arbitrary-shaped
+        Text Detection
+    [https://arxiv.org/abs/2104.10442]
+    Args:
+        fourier_degree (int) : The maximum Fourier transform degree k.
+        num_sample (int) : The sampling points number of regression
+            loss. If it is too small, fcenet tends to be overfitting.
+        ohem_ratio (float): the negative/positive ratio in OHEM.
+    """
+    def __init__(self, fourier_degree, num_sample, ohem_ratio=3.):
+        super().__init__()
+        self.fourier_degree = fourier_degree
+        self.num_sample = num_sample
+        self.ohem_ratio = ohem_ratio
+    def forward(self, preds, labels):
+        assert isinstance(preds, dict)
+        preds = preds['levels']
+        p3_maps, p4_maps, p5_maps = labels[1:]
+        assert p3_maps[0].shape[0] == 4 * self.fourier_degree + 5,\
+            'fourier degree not equal in FCEhead and FCEtarget'
+        # device = preds[0][0].device
+        # to tensor
+        gts = [p3_maps, p4_maps, p5_maps]
+        for idx, maps in enumerate(gts):
+            gts[idx] = paddle.to_tensor(np.stack(maps))
+        losses = multi_apply(self.forward_single, preds, gts)
+        loss_tr = paddle.to_tensor(0.).astype('float32')
+        loss_tcl = paddle.to_tensor(0.).astype('float32')
+        loss_reg_x = paddle.to_tensor(0.).astype('float32')
+        loss_reg_y = paddle.to_tensor(0.).astype('float32')
+        loss_all = paddle.to_tensor(0.).astype('float32')
+        for idx, loss in enumerate(losses):
+            loss_all += sum(loss)
+            if idx == 0:
+                loss_tr += sum(loss)
+            elif idx == 1:
+                loss_tcl += sum(loss)
+            elif idx == 2:
+                loss_reg_x += sum(loss)
+            else:
+                loss_reg_y += sum(loss)
+        results = dict(
+            loss=loss_all,
+            loss_text=loss_tr,
+            loss_center=loss_tcl,
+            loss_reg_x=loss_reg_x,
+            loss_reg_y=loss_reg_y, )
+        return results
+    def forward_single(self, pred, gt):
+        cls_pred = paddle.transpose(pred[0], (0, 2, 3, 1))
+        reg_pred = paddle.transpose(pred[1], (0, 2, 3, 1))
+        gt = paddle.transpose(gt, (0, 2, 3, 1))
+        k = 2 * self.fourier_degree + 1
+        tr_pred = paddle.reshape(cls_pred[:, :, :, :2], (-1, 2))
+        tcl_pred = paddle.reshape(cls_pred[:, :, :, 2:], (-1, 2))
+        x_pred = paddle.reshape(reg_pred[:, :, :, 0:k], (-1, k))
+        y_pred = paddle.reshape(reg_pred[:, :, :, k:2 * k], (-1, k))
+        tr_mask = gt[:, :, :, :1].reshape([-1])
+        tcl_mask = gt[:, :, :, 1:2].reshape([-1])
+        train_mask = gt[:, :, :, 2:3].reshape([-1])
+        x_map = paddle.reshape(gt[:, :, :, 3:3 + k], (-1, k))
+        y_map = paddle.reshape(gt[:, :, :, 3 + k:], (-1, k))
+        tr_train_mask = (train_mask * tr_mask).astype('bool')
+        tr_train_mask2 = paddle.concat(
+            [tr_train_mask.unsqueeze(1), tr_train_mask.unsqueeze(1)], axis=1)
+        # tr loss
+        loss_tr = self.ohem(tr_pred, tr_mask, train_mask)
+        # import pdb; pdb.set_trace()
+        # tcl loss
+        loss_tcl = paddle.to_tensor(0.).astype('float32')
+        tr_neg_mask = tr_train_mask.logical_not()
+        tr_neg_mask2 = paddle.concat(
+            [tr_neg_mask.unsqueeze(1), tr_neg_mask.unsqueeze(1)], axis=1)
+        if tr_train_mask.sum().item() > 0:
+            loss_tcl_pos = F.cross_entropy(
+                tcl_pred.masked_select(tr_train_mask2).reshape([-1, 2]),
+                tcl_mask.masked_select(tr_train_mask).astype('int64'))
+            loss_tcl_neg = F.cross_entropy(
+                tcl_pred.masked_select(tr_neg_mask2).reshape([-1, 2]),
+                tcl_mask.masked_select(tr_neg_mask).astype('int64'))
+            loss_tcl = loss_tcl_pos + 0.5 * loss_tcl_neg
+        # regression loss
+        loss_reg_x = paddle.to_tensor(0.).astype('float32')
+        loss_reg_y = paddle.to_tensor(0.).astype('float32')
+        if tr_train_mask.sum().item() > 0:
+            weight = (tr_mask.masked_select(tr_train_mask.astype('bool'))
+                      .astype('float32') + tcl_mask.masked_select(
+                          tr_train_mask.astype('bool')).astype('float32')) / 2
+            weight = weight.reshape([-1, 1])
+            ft_x, ft_y = self.fourier2poly(x_map, y_map)
+            ft_x_pre, ft_y_pre = self.fourier2poly(x_pred, y_pred)
+            dim = ft_x.shape[1]
+            tr_train_mask3 = paddle.concat(
+                [tr_train_mask.unsqueeze(1) for i in range(dim)], axis=1)
+            loss_reg_x = paddle.mean(weight * F.smooth_l1_loss(
+                ft_x_pre.masked_select(tr_train_mask3).reshape([-1, dim]),
+                ft_x.masked_select(tr_train_mask3).reshape([-1, dim]),
+                reduction='none'))
+            loss_reg_y = paddle.mean(weight * F.smooth_l1_loss(
+                ft_y_pre.masked_select(tr_train_mask3).reshape([-1, dim]),
+                ft_y.masked_select(tr_train_mask3).reshape([-1, dim]),
+                reduction='none'))
+        return loss_tr, loss_tcl, loss_reg_x, loss_reg_y
+    def ohem(self, predict, target, train_mask):
+        # device = train_mask.device
+        pos = (target * train_mask).astype('bool')
+        neg = ((1 - target) * train_mask).astype('bool')
+        pos2 = paddle.concat([pos.unsqueeze(1), pos.unsqueeze(1)], axis=1)
+        neg2 = paddle.concat([neg.unsqueeze(1), neg.unsqueeze(1)], axis=1)
+        n_pos = pos.astype('float32').sum()
+        if n_pos.item() > 0:
+            loss_pos = F.cross_entropy(
+                predict.masked_select(pos2).reshape([-1, 2]),
+                target.masked_select(pos).astype('int64'),
+                reduction='sum')
+            loss_neg = F.cross_entropy(
+                predict.masked_select(neg2).reshape([-1, 2]),
+                target.masked_select(neg).astype('int64'),
+                reduction='none')
+            n_neg = min(
+                int(neg.astype('float32').sum().item()),
+                int(self.ohem_ratio * n_pos.astype('float32')))
+        else:
+            loss_pos = paddle.to_tensor(0.)
+            loss_neg = F.cross_entropy(
+                predict.masked_select(neg2).reshape([-1, 2]),
+                target.masked_select(neg).astype('int64'),
+                reduction='none')
+            n_neg = 100
+        if len(loss_neg) > n_neg:
+            loss_neg, _ = paddle.topk(loss_neg, n_neg)
+        return (loss_pos + loss_neg.sum()) / (n_pos + n_neg).astype('float32')
+    def fourier2poly(self, real_maps, imag_maps):
+        """Transform Fourier coefficient maps to polygon maps.
+        Args:
+            real_maps (tensor): A map composed of the real parts of the
+                Fourier coefficients, whose shape is (-1, 2k+1)
+            imag_maps (tensor):A map composed of the imag parts of the
+                Fourier coefficients, whose shape is (-1, 2k+1)
+        Returns
+            x_maps (tensor): A map composed of the x value of the polygon
+                represented by n sample points (xn, yn), whose shape is (-1, n)
+            y_maps (tensor): A map composed of the y value of the polygon
+                represented by n sample points (xn, yn), whose shape is (-1, n)
+        """
+        k_vect = paddle.arange(
+            -self.fourier_degree, self.fourier_degree + 1,
+            dtype='float32').reshape([-1, 1])
+        i_vect = paddle.arange(
+            0, self.num_sample, dtype='float32').reshape([1, -1])
+        transform_matrix = 2 * np.pi / self.num_sample * paddle.matmul(k_vect,
+                                                                       i_vect)
+        x1 = paddle.einsum('ak, kn-> an', real_maps,
+                           paddle.cos(transform_matrix))
+        x2 = paddle.einsum('ak, kn-> an', imag_maps,
+                           paddle.sin(transform_matrix))
+        y1 = paddle.einsum('ak, kn-> an', real_maps,
+                           paddle.sin(transform_matrix))
+        y2 = paddle.einsum('ak, kn-> an', imag_maps,
+                           paddle.cos(transform_matrix))
+        x_maps = x1 - x2
+        y_maps = y1 + y2
+        return x_maps, y_maps
--- a/ppocr/metrics/__init__.py
+++ b/ppocr/metrics/__init__.py
@@ -21,7 +21,7 @@ import copy
 __all__ = ["build_metric"]
-from .det_metric import DetMetric
+from .det_metric import DetMetric, DetFCEMetric
 from .rec_metric import RecMetric
 from .cls_metric import ClsMetric
 from .e2e_metric import E2EMetric
@@ -34,7 +34,7 @@ from .vqa_token_re_metric import VQAReTokenMetric
 def build_metric(config):
    support_dict = [
-        "DetMetric", "RecMetric", "ClsMetric", "E2EMetric",
+        "DetMetric", "DetFCEMetric", "RecMetric", "ClsMetric", "E2EMetric",
        "DistillationMetric", "TableMetric", 'KIEMetric', 'VQASerTokenMetric',
        'VQAReTokenMetric'
    ]

--- a/ppocr/metrics/det_metric.py
+++ b/ppocr/metrics/det_metric.py
@@ -16,7 +16,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
-__all__ = ['DetMetric']
+__all__ = ['DetMetric', 'DetFCEMetric']
 from .eval_det_iou import DetectionIoUEvaluator
@@ -55,7 +55,6 @@ class DetMetric(object):
            result = self.evaluator.evaluate_image(gt_info_list, det_info_list)
            self.results.append(result)
    def get_metric(self):
        """
        return metrics {
@@ -71,3 +70,85 @@ class DetMetric(object):
    def reset(self):
        self.results = []  # clear results
+class DetFCEMetric(object):
+    def __init__(self, main_indicator='hmean', **kwargs):
+        self.evaluator = DetectionIoUEvaluator()
+        self.main_indicator = main_indicator
+        self.reset()
+    def __call__(self, preds, batch, **kwargs):
+        '''
+       batch: a list produced by dataloaders.
+           image: np.ndarray  of shape (N, C, H, W).
+           ratio_list: np.ndarray  of shape(N,2)
+           polygons: np.ndarray  of shape (N, K, 4, 2), the polygons of objective regions.
+           ignore_tags: np.ndarray  of shape (N, K), indicates whether a region is ignorable or not.
+       preds: a list of dict produced by post process
+            points: np.ndarray of shape (N, K, 4, 2), the polygons of objective regions.
+       '''
+        gt_polyons_batch = batch[2]
+        ignore_tags_batch = batch[3]
+        for pred, gt_polyons, ignore_tags in zip(preds, gt_polyons_batch,
+                                                 ignore_tags_batch):
+            # prepare gt
+            gt_info_list = [{
+                'points': gt_polyon,
+                'text': '',
+                'ignore': ignore_tag
+            } for gt_polyon, ignore_tag in zip(gt_polyons, ignore_tags)]
+            # prepare det
+            det_info_list = [{
+                'points': det_polyon,
+                'text': '',
+                'score': score
+            } for det_polyon, score in zip(pred['points'], pred['scores'])]
+            for score_thr in self.results.keys():
+                det_info_list_thr = [
+                    det_info for det_info in det_info_list
+                    if det_info['score'] >= score_thr
+                ]
+                result = self.evaluator.evaluate_image(gt_info_list,
+                                                       det_info_list_thr)
+                self.results[score_thr].append(result)
+    def get_metric(self):
+        """
+        return metrics {'heman':0,
+            'thr 0.3':'precision: 0 recall: 0 hmean: 0',
+            'thr 0.4':'precision: 0 recall: 0 hmean: 0',
+            'thr 0.5':'precision: 0 recall: 0 hmean: 0',
+            'thr 0.6':'precision: 0 recall: 0 hmean: 0',
+            'thr 0.7':'precision: 0 recall: 0 hmean: 0',
+            'thr 0.8':'precision: 0 recall: 0 hmean: 0',
+            'thr 0.9':'precision: 0 recall: 0 hmean: 0',
+            }
+        """
+        metircs = {}
+        hmean = 0
+        for score_thr in self.results.keys():
+            metirc = self.evaluator.combine_results(self.results[score_thr])
+            # for key, value in metirc.items():
+            #     metircs['{}_{}'.format(key, score_thr)] = value
+            metirc_str = 'precision:{:.5f} recall:{:.5f} hmean:{:.5f}'.format(
+                metirc['precision'], metirc['recall'], metirc['hmean'])
+            metircs['\n thr {}'.format(score_thr)] = metirc_str
+            hmean = max(hmean, metirc['hmean'])
+        metircs['hmean'] = hmean
+        self.reset()
+        return metircs
+    def reset(self):
+        self.results = {
+            0.3: [],
+            0.4: [],
+            0.5: [],
+            0.6: [],
+            0.7: [],
+            0.8: [],
+            0.9: []
+        }  # clear results
--- a/ppocr/modeling/backbones/det_resnet_vd.py
+++ b/ppocr/modeling/backbones/det_resnet_vd.py
@@ -21,9 +21,82 @@ from paddle import ParamAttr
 import paddle.nn as nn
 import paddle.nn.functional as F
+from paddle.vision.ops import DeformConv2D
+from paddle.regularizer import L2Decay
+from paddle.nn.initializer import Normal, Constant, XavierUniform
 __all__ = ["ResNet"]
+class DeformableConvV2(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 groups=1,
+                 weight_attr=None,
+                 bias_attr=None,
+                 lr_scale=1,
+                 regularizer=None,
+                 skip_quant=False,
+                 dcn_bias_regularizer=L2Decay(0.),
+                 dcn_bias_lr_scale=2.):
+        super(DeformableConvV2, self).__init__()
+        self.offset_channel = 2 * kernel_size**2 * groups
+        self.mask_channel = kernel_size**2 * groups
+        if bias_attr:
+            # in FCOS-DCN head, specifically need learning_rate and regularizer
+            dcn_bias_attr = ParamAttr(
+                initializer=Constant(value=0),
+                regularizer=dcn_bias_regularizer,
+                learning_rate=dcn_bias_lr_scale)
+        else:
+            # in ResNet backbone, do not need bias
+            dcn_bias_attr = False
+        self.conv_dcn = DeformConv2D(
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride=stride,
+            padding=(kernel_size - 1) // 2 * dilation,
+            dilation=dilation,
+            deformable_groups=groups,
+            weight_attr=weight_attr,
+            bias_attr=dcn_bias_attr)
+        if lr_scale == 1 and regularizer is None:
+            offset_bias_attr = ParamAttr(initializer=Constant(0.))
+        else:
+            offset_bias_attr = ParamAttr(
+                initializer=Constant(0.),
+                learning_rate=lr_scale,
+                regularizer=regularizer)
+        self.conv_offset = nn.Conv2D(
+            in_channels,
+            groups * 3 * kernel_size**2,
+            kernel_size,
+            stride=stride,
+            padding=(kernel_size - 1) // 2,
+            weight_attr=ParamAttr(initializer=Constant(0.0)),
+            bias_attr=offset_bias_attr)
+        if skip_quant:
+            self.conv_offset.skip_quant = True
+    def forward(self, x):
+        offset_mask = self.conv_offset(x)
+        offset, mask = paddle.split(
+            offset_mask,
+            num_or_sections=[self.offset_channel, self.mask_channel],
+            axis=1)
+        mask = F.sigmoid(mask)
+        y = self.conv_dcn(x, offset, mask=mask)
+        return y
 class ConvBNLayer(nn.Layer):
    def __init__(self,
                 in_channels,
@@ -32,12 +105,14 @@ class ConvBNLayer(nn.Layer):
                 stride=1,
                 groups=1,
                 is_vd_mode=False,
-                 act=None):
+                 act=None,
+                 is_dcn=False):
        super(ConvBNLayer, self).__init__()
        self.is_vd_mode = is_vd_mode
        self._pool2d_avg = nn.AvgPool2D(
            kernel_size=2, stride=2, padding=0, ceil_mode=True)
+        if not is_dcn:
            self._conv = nn.Conv2D(
                in_channels=in_channels,
                out_channels=out_channels,
@@ -46,6 +121,15 @@ class ConvBNLayer(nn.Layer):
                padding=(kernel_size - 1) // 2,
                groups=groups,
                bias_attr=False)
+        else:
+            self._conv = DeformableConvV2(
+                in_channels=in_channels,
+                out_channels=out_channels,
+                kernel_size=kernel_size,
+                stride=stride,
+                padding=(kernel_size - 1) // 2,
+                groups=2,  #groups,
+                bias_attr=False)
        self._batch_norm = nn.BatchNorm(out_channels, act=act)
    def forward(self, inputs):
@@ -57,12 +141,14 @@ class ConvBNLayer(nn.Layer):
 class BottleneckBlock(nn.Layer):
-    def __init__(self,
+    def __init__(
+            self,
            in_channels,
            out_channels,
            stride,
            shortcut=True,
-                 if_first=False):
+            if_first=False,
+            is_dcn=False, ):
        super(BottleneckBlock, self).__init__()
        self.conv0 = ConvBNLayer(
@@ -75,7 +161,8 @@ class BottleneckBlock(nn.Layer):
            out_channels=out_channels,
            kernel_size=3,
            stride=stride,
-            act='relu')
+            act='relu',
+            is_dcn=is_dcn)
        self.conv2 = ConvBNLayer(
            in_channels=out_channels,
            out_channels=out_channels * 4,
@@ -152,7 +239,12 @@ class BasicBlock(nn.Layer):
 class ResNet(nn.Layer):
-    def __init__(self, in_channels=3, layers=50, **kwargs):
+    def __init__(self,
+                 in_channels=3,
+                 layers=50,
+                 dcn_stage=None,
+                 out_indices=None,
+                 **kwargs):
        super(ResNet, self).__init__()
        self.layers = layers
@@ -175,6 +267,13 @@ class ResNet(nn.Layer):
                        1024] if layers >= 50 else [64, 64, 128, 256]
        num_filters = [64, 128, 256, 512]
+        self.dcn_stage = dcn_stage if dcn_stage is not None else [
+            False, False, False, False
+        ]
+        self.out_indices = out_indices if out_indices is not None else [
+            0, 1, 2, 3
+        ]
        self.conv1_1 = ConvBNLayer(
            in_channels=in_channels,
            out_channels=32,
@@ -201,6 +300,7 @@ class ResNet(nn.Layer):
            for block in range(len(depth)):
                block_list = []
                shortcut = False
+                is_dcn = self.dcn_stage[block]
                for i in range(depth[block]):
                    bottleneck_block = self.add_sublayer(
                        'bb_%d_%d' % (block, i),
@@ -210,15 +310,18 @@ class ResNet(nn.Layer):
                            out_channels=num_filters[block],
                            stride=2 if i == 0 and block != 0 else 1,
                            shortcut=shortcut,
-                            if_first=block == i == 0))
+                            if_first=block == i == 0,
+                            is_dcn=is_dcn))
                    shortcut = True
                    block_list.append(bottleneck_block)
+                if block in self.out_indices:
                    self.out_channels.append(num_filters[block] * 4)
                self.stages.append(nn.Sequential(*block_list))
        else:
            for block in range(len(depth)):
                block_list = []
                shortcut = False
+                # is_dcn = self.dcn_stage[block]
                for i in range(depth[block]):
                    basic_block = self.add_sublayer(
                        'bb_%d_%d' % (block, i),
@@ -231,6 +334,7 @@ class ResNet(nn.Layer):
                            if_first=block == i == 0))
                    shortcut = True
                    block_list.append(basic_block)
+                if block in self.out_indices:
                    self.out_channels.append(num_filters[block])
                self.stages.append(nn.Sequential(*block_list))
@@ -240,7 +344,8 @@ class ResNet(nn.Layer):
        y = self.conv1_3(y)
        y = self.pool2d_max(y)
        out = []
-        for block in self.stages:
+        for i, block in enumerate(self.stages):
            y = block(y)
+            if i in self.out_indices:
                out.append(y)
        return out
--- a/ppocr/modeling/heads/__init__.py
+++ b/ppocr/modeling/heads/__init__.py
@@ -21,6 +21,7 @@ def build_head(config):
    from .det_east_head import EASTHead
    from .det_sast_head import SASTHead
    from .det_pse_head import PSEHead
+    from .det_fce_head import FCEHead
    from .e2e_pg_head import PGHead
    # rec head
@@ -40,8 +41,8 @@ def build_head(config):
    from .table_att_head import TableAttentionHead
    support_dict = [
-        'DBHead', 'PSEHead', 'EASTHead', 'SASTHead', 'CTCHead', 'ClsHead',
+        'DBHead', 'PSEHead', 'FCEHead', 'EASTHead', 'SASTHead', 'CTCHead',
-        'AttentionHead', 'SRNHead', 'PGHead', 'Transformer',
+        'ClsHead', 'AttentionHead', 'SRNHead', 'PGHead', 'Transformer',
        'TableAttentionHead', 'SARHead', 'AsterHead', 'SDMGRHead'
    ]

--- a/ppocr/modeling/heads/det_fce_head.py
+++ b/ppocr/modeling/heads/det_fce_head.py
+from paddle import nn
+from paddle import ParamAttr
+import paddle.nn.functional as F
+from paddle.nn.initializer import Normal
+import paddle
+from functools import partial
+def multi_apply(func, *args, **kwargs):
+    """Apply function to a list of arguments.
+    Note:
+        This function applies the ``func`` to multiple inputs and
+        map the multiple outputs of the ``func`` into different
+        list. Each list contains the same type of outputs corresponding
+        to different inputs.
+    Args:
+        func (Function): A function that will be applied to a list of
+            arguments
+    Returns:
+        tuple(list): A tuple containing multiple list, each list contains \
+            a kind of returned results by the function
+    """
+    pfunc = partial(func, **kwargs) if kwargs else func
+    map_results = map(pfunc, *args)
+    return tuple(map(list, zip(*map_results)))
+class FCEHead(nn.Layer):
+    """The class for implementing FCENet head.
+    FCENet(CVPR2021): Fourier Contour Embedding for Arbitrary-shaped Text
+    Detection.
+    [https://arxiv.org/abs/2104.10442]
+    Args:
+        in_channels (int): The number of input channels.
+        scales (list[int]) : The scale of each layer.
+        fourier_degree (int) : The maximum Fourier transform degree k.
+    """
+    def __init__(self, in_channels, scales, fourier_degree=5):
+        super().__init__()
+        assert isinstance(in_channels, int)
+        self.downsample_ratio = 1.0
+        self.in_channels = in_channels
+        self.scales = scales
+        self.fourier_degree = fourier_degree
+        self.out_channels_cls = 4
+        self.out_channels_reg = (2 * self.fourier_degree + 1) * 2
+        self.out_conv_cls = nn.Conv2D(
+            in_channels=self.in_channels,
+            out_channels=self.out_channels_cls,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            groups=1,
+            weight_attr=ParamAttr(
+                name='cls_weights',
+                initializer=Normal(
+                    mean=paddle.to_tensor(0.), std=paddle.to_tensor(0.01))),
+            bias_attr=True)
+        self.out_conv_reg = nn.Conv2D(
+            in_channels=self.in_channels,
+            out_channels=self.out_channels_reg,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            groups=1,
+            weight_attr=ParamAttr(
+                name='reg_weights',
+                initializer=Normal(
+                    mean=paddle.to_tensor(0.), std=paddle.to_tensor(0.01))),
+            bias_attr=True)
+    def forward(self, feats, targets=None):
+        cls_res, reg_res = multi_apply(self.forward_single, feats)
+        level_num = len(cls_res)
+        # import pdb;pdb.set_trace()
+        outs = {}
+        if not self.training:
+            for i in range(level_num):
+                tr_pred = F.softmax(cls_res[i][:, 0:2, :, :], axis=1)
+                tcl_pred = F.softmax(cls_res[i][:, 2:, :, :], axis=1)
+                outs['level_{}'.format(i)] = paddle.concat(
+                    [tr_pred, tcl_pred, reg_res[i]], axis=1)
+        else:
+            preds = [[cls_res[i], reg_res[i]] for i in range(level_num)]
+            outs['levels'] = preds
+        return outs
+    def forward_single(self, x):
+        cls_predict = self.out_conv_cls(x)
+        reg_predict = self.out_conv_reg(x)
+        return cls_predict, reg_predict
--- a/ppocr/modeling/necks/__init__.py
+++ b/ppocr/modeling/necks/__init__.py
@@ -23,7 +23,11 @@ def build_neck(config):
    from .pg_fpn import PGFPN
    from .table_fpn import TableFPN
    from .fpn import FPN
-    support_dict = ['FPN','DBFPN', 'EASTFPN', 'SASTFPN', 'SequenceEncoder', 'PGFPN', 'TableFPN']
+    from .fce_fpn import FCEFPN
+    support_dict = [
+        'FPN', 'FCEFPN', 'DBFPN', 'EASTFPN', 'SASTFPN', 'SequenceEncoder',
+        'PGFPN', 'TableFPN'
+    ]
    module_name = config.pop('name')
    assert module_name in support_dict, Exception('neck only support {}'.format(

--- a/ppocr/modeling/necks/fce_fpn.py
+++ b/ppocr/modeling/necks/fce_fpn.py
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+from paddle.nn.initializer import XavierUniform
+from paddle.nn.initializer import Normal
+from paddle.regularizer import L2Decay
+__all__ = ['FCEFPN']
+class ConvNormLayer(nn.Layer):
+    def __init__(self,
+                 ch_in,
+                 ch_out,
+                 filter_size,
+                 stride,
+                 groups=1,
+                 norm_type='bn',
+                 norm_decay=0.,
+                 norm_groups=32,
+                 lr_scale=1.,
+                 freeze_norm=False,
+                 initializer=Normal(
+                     mean=0., std=0.01)):
+        super(ConvNormLayer, self).__init__()
+        assert norm_type in ['bn', 'sync_bn', 'gn']
+        bias_attr = False
+        self.conv = nn.Conv2D(
+            in_channels=ch_in,
+            out_channels=ch_out,
+            kernel_size=filter_size,
+            stride=stride,
+            padding=(filter_size - 1) // 2,
+            groups=groups,
+            weight_attr=ParamAttr(
+                initializer=initializer, learning_rate=1.),
+            bias_attr=bias_attr)
+        norm_lr = 0. if freeze_norm else 1.
+        param_attr = ParamAttr(
+            learning_rate=norm_lr,
+            regularizer=L2Decay(norm_decay) if norm_decay is not None else None)
+        bias_attr = ParamAttr(
+            learning_rate=norm_lr,
+            regularizer=L2Decay(norm_decay) if norm_decay is not None else None)
+        if norm_type == 'bn':
+            self.norm = nn.BatchNorm2D(
+                ch_out, weight_attr=param_attr, bias_attr=bias_attr)
+        elif norm_type == 'sync_bn':
+            self.norm = nn.SyncBatchNorm(
+                ch_out, weight_attr=param_attr, bias_attr=bias_attr)
+        elif norm_type == 'gn':
+            self.norm = nn.GroupNorm(
+                num_groups=norm_groups,
+                num_channels=ch_out,
+                weight_attr=param_attr,
+                bias_attr=bias_attr)
+    def forward(self, inputs):
+        out = self.conv(inputs)
+        out = self.norm(out)
+        return out
+class FCEFPN(nn.Layer):
+    """
+    Feature Pyramid Network, see https://arxiv.org/abs/1612.03144
+    Args:
+        in_channels (list[int]): input channels of each level which can be 
+            derived from the output shape of backbone by from_config
+        out_channels (list[int]): output channel of each level
+        spatial_scales (list[float]): the spatial scales between input feature
+            maps and original input image which can be derived from the output 
+            shape of backbone by from_config
+        has_extra_convs (bool): whether to add extra conv to the last level.
+            default False
+        extra_stage (int): the number of extra stages added to the last level.
+            default 1
+        use_c5 (bool): Whether to use c5 as the input of extra stage, 
+            otherwise p5 is used. default True
+        norm_type (string|None): The normalization type in FPN module. If 
+            norm_type is None, norm will not be used after conv and if 
+            norm_type is string, bn, gn, sync_bn are available. default None
+        norm_decay (float): weight decay for normalization layer weights.
+            default 0.
+        freeze_norm (bool): whether to freeze normalization layer.  
+            default False
+        relu_before_extra_convs (bool): whether to add relu before extra convs.
+            default False
+    """
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 spatial_scales=[0.25, 0.125, 0.0625, 0.03125],
+                 has_extra_convs=False,
+                 extra_stage=1,
+                 use_c5=True,
+                 norm_type=None,
+                 norm_decay=0.,
+                 freeze_norm=False,
+                 relu_before_extra_convs=True):
+        super(FCEFPN, self).__init__()
+        self.out_channels = out_channels
+        for s in range(extra_stage):
+            spatial_scales = spatial_scales + [spatial_scales[-1] / 2.]
+        self.spatial_scales = spatial_scales
+        self.has_extra_convs = has_extra_convs
+        self.extra_stage = extra_stage
+        self.use_c5 = use_c5
+        self.relu_before_extra_convs = relu_before_extra_convs
+        self.norm_type = norm_type
+        self.norm_decay = norm_decay
+        self.freeze_norm = freeze_norm
+        self.lateral_convs = []
+        self.fpn_convs = []
+        fan = out_channels * 3 * 3
+        # stage index 0,1,2,3 stands for res2,res3,res4,res5 on ResNet Backbone
+        # 0 <= st_stage < ed_stage <= 3
+        st_stage = 4 - len(in_channels)
+        ed_stage = st_stage + len(in_channels) - 1
+        for i in range(st_stage, ed_stage + 1):
+            if i == 3:
+                lateral_name = 'fpn_inner_res5_sum'
+            else:
+                lateral_name = 'fpn_inner_res{}_sum_lateral'.format(i + 2)
+            in_c = in_channels[i - st_stage]
+            if self.norm_type is not None:
+                lateral = self.add_sublayer(
+                    lateral_name,
+                    ConvNormLayer(
+                        ch_in=in_c,
+                        ch_out=out_channels,
+                        filter_size=1,
+                        stride=1,
+                        norm_type=self.norm_type,
+                        norm_decay=self.norm_decay,
+                        freeze_norm=self.freeze_norm,
+                        initializer=XavierUniform(fan_out=in_c)))
+            else:
+                lateral = self.add_sublayer(
+                    lateral_name,
+                    nn.Conv2D(
+                        in_channels=in_c,
+                        out_channels=out_channels,
+                        kernel_size=1,
+                        weight_attr=ParamAttr(
+                            initializer=XavierUniform(fan_out=in_c))))
+            self.lateral_convs.append(lateral)
+        for i in range(st_stage, ed_stage + 1):
+            fpn_name = 'fpn_res{}_sum'.format(i + 2)
+            if self.norm_type is not None:
+                fpn_conv = self.add_sublayer(
+                    fpn_name,
+                    ConvNormLayer(
+                        ch_in=out_channels,
+                        ch_out=out_channels,
+                        filter_size=3,
+                        stride=1,
+                        norm_type=self.norm_type,
+                        norm_decay=self.norm_decay,
+                        freeze_norm=self.freeze_norm,
+                        initializer=XavierUniform(fan_out=fan)))
+            else:
+                fpn_conv = self.add_sublayer(
+                    fpn_name,
+                    nn.Conv2D(
+                        in_channels=out_channels,
+                        out_channels=out_channels,
+                        kernel_size=3,
+                        padding=1,
+                        weight_attr=ParamAttr(
+                            initializer=XavierUniform(fan_out=fan))))
+            self.fpn_convs.append(fpn_conv)
+        # add extra conv levels for RetinaNet(use_c5)/FCOS(use_p5)
+        if self.has_extra_convs:
+            for i in range(self.extra_stage):
+                lvl = ed_stage + 1 + i
+                if i == 0 and self.use_c5:
+                    in_c = in_channels[-1]
+                else:
+                    in_c = out_channels
+                extra_fpn_name = 'fpn_{}'.format(lvl + 2)
+                if self.norm_type is not None:
+                    extra_fpn_conv = self.add_sublayer(
+                        extra_fpn_name,
+                        ConvNormLayer(
+                            ch_in=in_c,
+                            ch_out=out_channels,
+                            filter_size=3,
+                            stride=2,
+                            norm_type=self.norm_type,
+                            norm_decay=self.norm_decay,
+                            freeze_norm=self.freeze_norm,
+                            initializer=XavierUniform(fan_out=fan)))
+                else:
+                    extra_fpn_conv = self.add_sublayer(
+                        extra_fpn_name,
+                        nn.Conv2D(
+                            in_channels=in_c,
+                            out_channels=out_channels,
+                            kernel_size=3,
+                            stride=2,
+                            padding=1,
+                            weight_attr=ParamAttr(
+                                initializer=XavierUniform(fan_out=fan))))
+                self.fpn_convs.append(extra_fpn_conv)
+    @classmethod
+    def from_config(cls, cfg, input_shape):
+        return {
+            'in_channels': [i.channels for i in input_shape],
+            'spatial_scales': [1.0 / i.stride for i in input_shape],
+        }
+    def forward(self, body_feats):
+        laterals = []
+        num_levels = len(body_feats)
+        for i in range(num_levels):
+            laterals.append(self.lateral_convs[i](body_feats[i]))
+        for i in range(1, num_levels):
+            lvl = num_levels - i
+            upsample = F.interpolate(
+                laterals[lvl],
+                scale_factor=2.,
+                mode='nearest', )
+            laterals[lvl - 1] += upsample
+        fpn_output = []
+        for lvl in range(num_levels):
+            fpn_output.append(self.fpn_convs[lvl](laterals[lvl]))
+        if self.extra_stage > 0:
+            # use max pool to get more levels on top of outputs (Faster R-CNN, Mask R-CNN)
+            if not self.has_extra_convs:
+                assert self.extra_stage == 1, 'extra_stage should be 1 if FPN has not extra convs'
+                fpn_output.append(F.max_pool2d(fpn_output[-1], 1, stride=2))
+            # add extra conv levels for RetinaNet(use_c5)/FCOS(use_p5)
+            else:
+                if self.use_c5:
+                    extra_source = body_feats[-1]
+                else:
+                    extra_source = fpn_output[-1]
+                fpn_output.append(self.fpn_convs[num_levels](extra_source))
+                for i in range(1, self.extra_stage):
+                    if self.relu_before_extra_convs:
+                        fpn_output.append(self.fpn_convs[num_levels + i](F.relu(
+                            fpn_output[-1])))
+                    else:
+                        fpn_output.append(self.fpn_convs[num_levels + i](
+                            fpn_output[-1]))
+        return fpn_output
--- a/ppocr/postprocess/__init__.py
+++ b/ppocr/postprocess/__init__.py
@@ -24,6 +24,7 @@ __all__ = ['build_post_process']
 from .db_postprocess import DBPostProcess, DistillationDBPostProcess
 from .east_postprocess import EASTPostProcess
 from .sast_postprocess import SASTPostProcess
+from .fce_postprocess import FCEPostProcess
 from .rec_postprocess import CTCLabelDecode, AttnLabelDecode, SRNLabelDecode, DistillationCTCLabelDecode, \
    TableLabelDecode, NRTRLabelDecode, SARLabelDecode, SEEDLabelDecode
 from .cls_postprocess import ClsPostProcess
@@ -34,9 +35,9 @@ from .vqa_token_re_layoutlm_postprocess import VQAReTokenLayoutLMPostProcess
 def build_post_process(config, global_config=None):
    support_dict = [
-        'DBPostProcess', 'EASTPostProcess', 'SASTPostProcess', 'CTCLabelDecode',
+        'DBPostProcess', 'EASTPostProcess', 'SASTPostProcess', 'FCEPostProcess',
-        'AttnLabelDecode', 'ClsPostProcess', 'SRNLabelDecode', 'PGPostProcess',
+        'CTCLabelDecode', 'AttnLabelDecode', 'ClsPostProcess', 'SRNLabelDecode',
-        'DistillationCTCLabelDecode', 'TableLabelDecode',
+        'PGPostProcess', 'DistillationCTCLabelDecode', 'TableLabelDecode',
        'DistillationDBPostProcess', 'NRTRLabelDecode', 'SARLabelDecode',
        'SEEDLabelDecode', 'VQASerTokenLayoutLMPostProcess',
        'VQAReTokenLayoutLMPostProcess'

--- a/ppocr/postprocess/fce_postprocess.py
+++ b/ppocr/postprocess/fce_postprocess.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import numpy as np
+import cv2
+import paddle
+from numpy.fft import ifft
+import Polygon as plg
+def points2polygon(points):
+    """Convert k points to 1 polygon.
+    Args:
+        points (ndarray or list): A ndarray or a list of shape (2k)
+            that indicates k points.
+    Returns:
+        polygon (Polygon): A polygon object.
+    """
+    if isinstance(points, list):
+        points = np.array(points)
+    assert isinstance(points, np.ndarray)
+    assert (points.size % 2 == 0) and (points.size >= 8)
+    point_mat = points.reshape([-1, 2])
+    return plg.Polygon(point_mat)
+def poly_intersection(poly_det, poly_gt):
+    """Calculate the intersection area between two polygon.
+    Args:
+        poly_det (Polygon): A polygon predicted by detector.
+        poly_gt (Polygon): A gt polygon.
+    Returns:
+        intersection_area (float): The intersection area between two polygons.
+    """
+    assert isinstance(poly_det, plg.Polygon)
+    assert isinstance(poly_gt, plg.Polygon)
+    poly_inter = poly_det & poly_gt
+    if len(poly_inter) == 0:
+        return 0, poly_inter
+    return poly_inter.area(), poly_inter
+def poly_union(poly_det, poly_gt):
+    """Calculate the union area between two polygon.
+    Args:
+        poly_det (Polygon): A polygon predicted by detector.
+        poly_gt (Polygon): A gt polygon.
+    Returns:
+        union_area (float): The union area between two polygons.
+    """
+    assert isinstance(poly_det, plg.Polygon)
+    assert isinstance(poly_gt, plg.Polygon)
+    area_det = poly_det.area()
+    area_gt = poly_gt.area()
+    area_inters, _ = poly_intersection(poly_det, poly_gt)
+    return area_det + area_gt - area_inters
+def valid_boundary(x, with_score=True):
+    num = len(x)
+    if num < 8:
+        return False
+    if num % 2 == 0 and (not with_score):
+        return True
+    if num % 2 == 1 and with_score:
+        return True
+    return False
+def boundary_iou(src, target):
+    """Calculate the IOU between two boundaries.
+    Args:
+       src (list): Source boundary.
+       target (list): Target boundary.
+    Returns:
+       iou (float): The iou between two boundaries.
+    """
+    assert valid_boundary(src, False)
+    assert valid_boundary(target, False)
+    src_poly = points2polygon(src)
+    target_poly = points2polygon(target)
+    return poly_iou(src_poly, target_poly)
+def poly_iou(poly_det, poly_gt):
+    """Calculate the IOU between two polygons.
+    Args:
+        poly_det (Polygon): A polygon predicted by detector.
+        poly_gt (Polygon): A gt polygon.
+    Returns:
+        iou (float): The IOU between two polygons.
+    """
+    assert isinstance(poly_det, plg.Polygon)
+    assert isinstance(poly_gt, plg.Polygon)
+    area_inters, _ = poly_intersection(poly_det, poly_gt)
+    area_union = poly_union(poly_det, poly_gt)
+    if area_union == 0:
+        return 0.0
+    return area_inters / area_union
+def poly_nms(polygons, threshold):
+    assert isinstance(polygons, list)
+    polygons = np.array(sorted(polygons, key=lambda x: x[-1]))
+    keep_poly = []
+    index = [i for i in range(polygons.shape[0])]
+    while len(index) > 0:
+        keep_poly.append(polygons[index[-1]].tolist())
+        A = polygons[index[-1]][:-1]
+        index = np.delete(index, -1)
+        iou_list = np.zeros((len(index), ))
+        for i in range(len(index)):
+            B = polygons[index[i]][:-1]
+            iou_list[i] = boundary_iou(A, B)
+        remove_index = np.where(iou_list > threshold)
+        index = np.delete(index, remove_index)
+    return keep_poly
+def fill_hole(input_mask):
+    h, w = input_mask.shape
+    canvas = np.zeros((h + 2, w + 2), np.uint8)
+    canvas[1:h + 1, 1:w + 1] = input_mask.copy()
+    mask = np.zeros((h + 4, w + 4), np.uint8)
+    cv2.floodFill(canvas, mask, (0, 0), 1)
+    canvas = canvas[1:h + 1, 1:w + 1].astype(np.bool)
+    return ~canvas | input_mask
+def fourier2poly(fourier_coeff, num_reconstr_points=50):
+    """ Inverse Fourier transform
+        Args:
+            fourier_coeff (ndarray): Fourier coefficients shaped (n, 2k+1),
+                with n and k being candidates number and Fourier degree
+                respectively.
+            num_reconstr_points (int): Number of reconstructed polygon points.
+        Returns:
+            Polygons (ndarray): The reconstructed polygons shaped (n, n')
+        """
+    a = np.zeros((len(fourier_coeff), num_reconstr_points), dtype='complex')
+    k = (len(fourier_coeff[0]) - 1) // 2
+    a[:, 0:k + 1] = fourier_coeff[:, k:]
+    a[:, -k:] = fourier_coeff[:, :k]
+    poly_complex = ifft(a) * num_reconstr_points
+    polygon = np.zeros((len(fourier_coeff), num_reconstr_points, 2))
+    polygon[:, :, 0] = poly_complex.real
+    polygon[:, :, 1] = poly_complex.imag
+    return polygon.astype('int32').reshape((len(fourier_coeff), -1))
+def fcenet_decode(preds,
+                  fourier_degree,
+                  num_reconstr_points,
+                  scale,
+                  alpha=1.0,
+                  beta=2.0,
+                  text_repr_type='poly',
+                  score_thr=0.3,
+                  nms_thr=0.1):
+    """Decoding predictions of FCENet to instances.
+    Args:
+        preds (list(Tensor)): The head output tensors.
+        fourier_degree (int): The maximum Fourier transform degree k.
+        num_reconstr_points (int): The points number of the polygon
+            reconstructed from predicted Fourier coefficients.
+        scale (int): The down-sample scale of the prediction.
+        alpha (float) : The parameter to calculate final scores. Score_{final}
+                = (Score_{text region} ^ alpha)
+                * (Score_{text center region}^ beta)
+        beta (float) : The parameter to calculate final score.
+        text_repr_type (str):  Boundary encoding type 'poly' or 'quad'.
+        score_thr (float) : The threshold used to filter out the final
+            candidates.
+        nms_thr (float) :  The threshold of nms.
+    Returns:
+        boundaries (list[list[float]]): The instance boundary and confidence
+            list.
+    """
+    assert isinstance(preds, list)
+    assert len(preds) == 2
+    assert text_repr_type in ['poly', 'quad']
+    # import pdb;pdb.set_trace()
+    cls_pred = preds[0][0]
+    # tr_pred = F.softmax(cls_pred[0:2], axis=0).cpu().numpy()
+    # tcl_pred = F.softmax(cls_pred[2:], axis=0).cpu().numpy()
+    tr_pred = cls_pred[0:2]
+    tcl_pred = cls_pred[2:]
+    reg_pred = preds[1][0].transpose([1, 2, 0])  #.cpu().numpy()
+    x_pred = reg_pred[:, :, :2 * fourier_degree + 1]
+    y_pred = reg_pred[:, :, 2 * fourier_degree + 1:]
+    score_pred = (tr_pred[1]**alpha) * (tcl_pred[1]**beta)
+    tr_pred_mask = (score_pred) > score_thr
+    tr_mask = fill_hole(tr_pred_mask)
+    tr_contours, _ = cv2.findContours(
+        tr_mask.astype(np.uint8), cv2.RETR_TREE,
+        cv2.CHAIN_APPROX_SIMPLE)  # opencv4
+    mask = np.zeros_like(tr_mask)
+    boundaries = []
+    for cont in tr_contours:
+        deal_map = mask.copy().astype(np.int8)
+        cv2.drawContours(deal_map, [cont], -1, 1, -1)
+        score_map = score_pred * deal_map
+        score_mask = score_map > 0
+        xy_text = np.argwhere(score_mask)
+        dxy = xy_text[:, 1] + xy_text[:, 0] * 1j
+        x, y = x_pred[score_mask], y_pred[score_mask]
+        c = x + y * 1j
+        c[:, fourier_degree] = c[:, fourier_degree] + dxy
+        c *= scale
+        polygons = fourier2poly(c, num_reconstr_points)
+        score = score_map[score_mask].reshape(-1, 1)
+        polygons = poly_nms(np.hstack((polygons, score)).tolist(), nms_thr)
+        boundaries = boundaries + polygons
+    boundaries = poly_nms(boundaries, nms_thr)
+    if text_repr_type == 'quad':
+        new_boundaries = []
+        for boundary in boundaries:
+            poly = np.array(boundary[:-1]).reshape(-1, 2).astype(np.float32)
+            score = boundary[-1]
+            points = cv2.boxPoints(cv2.minAreaRect(poly))
+            points = np.int0(points)
+            new_boundaries.append(points.reshape(-1).tolist() + [score])
+    return boundaries
+class FCEPostProcess(object):
+    """
+    The post process for FCENet.
+    """
+    def __init__(self,
+                 scales,
+                 fourier_degree=5,
+                 num_reconstr_points=50,
+                 decoding_type='fcenet',
+                 score_thr=0.3,
+                 nms_thr=0.1,
+                 alpha=1.0,
+                 beta=1.0,
+                 text_repr_type='poly',
+                 **kwargs):
+        self.scales = scales
+        self.fourier_degree = fourier_degree
+        self.num_reconstr_points = num_reconstr_points
+        self.decoding_type = decoding_type
+        self.score_thr = score_thr
+        self.nms_thr = nms_thr
+        self.alpha = alpha
+        self.beta = beta
+        self.text_repr_type = text_repr_type
+    def __call__(self, preds, shape_list):
+        score_maps = []
+        for key, value in preds.items():
+            if isinstance(value, paddle.Tensor):
+                value = value.numpy()
+            cls_res = value[:, :4, :, :]
+            reg_res = value[:, 4:, :, :]
+            score_maps.append([cls_res, reg_res])
+        return self.get_boundary(score_maps, shape_list)
+    def resize_boundary(self, boundaries, scale_factor):
+        """Rescale boundaries via scale_factor.
+        Args:
+            boundaries (list[list[float]]): The boundary list. Each boundary
+            with size 2k+1 with k>=4.
+            scale_factor(ndarray): The scale factor of size (4,).
+        Returns:
+            boundaries (list[list[float]]): The scaled boundaries.
+        """
+        # assert check_argument.is_2dlist(boundaries)
+        # assert isinstance(scale_factor, np.ndarray)
+        # assert scale_factor.shape[0] == 4
+        boxes = []
+        scores = []
+        for b in boundaries:
+            sz = len(b)
+            valid_boundary(b, True)
+            scores.append(b[-1])
+            b = (np.array(b[:sz - 1]) *
+                 (np.tile(scale_factor[:2], int(
+                     (sz - 1) / 2)).reshape(1, sz - 1))).flatten().tolist()
+            boxes.append(np.array(b).reshape([-1, 2]))
+        return np.array(boxes, dtype=np.float32), scores
+    def get_boundary(self, score_maps, shape_list):
+        assert len(score_maps) == len(self.scales)
+        # import pdb;pdb.set_trace()
+        boundaries = []
+        for idx, score_map in enumerate(score_maps):
+            scale = self.scales[idx]
+            boundaries = boundaries + self._get_boundary_single(score_map,
+                                                                scale)
+        # nms
+        boundaries = poly_nms(boundaries, self.nms_thr)
+        # if rescale:
+        # import pdb;pdb.set_trace()
+        boundaries, scores = self.resize_boundary(
+            boundaries, (1 / shape_list[0, 2:]).tolist()[::-1])
+        boxes_batch = [dict(points=boundaries, scores=scores)]
+        return boxes_batch
+    def _get_boundary_single(self, score_map, scale):
+        assert len(score_map) == 2
+        assert score_map[1].shape[1] == 4 * self.fourier_degree + 2
+        return fcenet_decode(
+            preds=score_map,
+            fourier_degree=self.fourier_degree,
+            num_reconstr_points=self.num_reconstr_points,
+            scale=scale,
+            alpha=self.alpha,
+            beta=self.beta,
+            text_repr_type=self.text_repr_type,
+            score_thr=self.score_thr,
+            nms_thr=self.nms_thr)
--- a/train.sh
+++ b/train.sh
 # recommended paddle.__version__ == 2.0.0
-python3 -m paddle.distributed.launch --log_dir=./debug/ --gpus '0,1,2,3,4,5,6,7'  tools/train.py -c configs/rec/rec_mv3_none_bilstm_ctc.yml
+# python3 -m paddle.distributed.launch --log_dir=./debug/ --gpus '0,1,2,3,4,5,6,7'  tools/train.py -c configs/rec/rec_mv3_none_bilstm_ctc.yml
+python -m paddle.distributed.launch --gpus '7'  tools/train.py -c configs/det/det_r50_fce_ctw.yml