dygraph first commit

aad3093a · WenmuZhou · 10f7e519 · 10f7e519 · aad3093a · 10f7e519
Commit aad3093a authored Oct 13, 2020 by WenmuZhou
20 changed files
--- a/configs/rec/rec_mv3_tps_bilstm_ctc.yml
+++ b/configs/rec/rec_mv3_tps_bilstm_ctc.yml
-Global:
-  algorithm: STARNet
-  use_gpu: true
-  epoch_num: 72
-  log_smooth_window: 20
-  print_batch_step: 10
-  save_model_dir: output/rec_STARNet
-  save_epoch_step: 3
-  eval_batch_step: 2000
-  train_batch_size_per_card: 256
-  test_batch_size_per_card: 256
-  image_shape: [3, 32, 100]
-  max_text_length: 25
-  character_type: en
-  loss_type: ctc
-  tps: true
-  reader_yml: ./configs/rec/rec_benchmark_reader.yml
-  pretrain_weights:
-  checkpoints:
-  save_inference_dir:
-  infer_img:
-
-  
-Architecture:
-  function: ppocr.modeling.architectures.rec_model,RecModel
-
-TPS:
-  function: ppocr.modeling.stns.tps,TPS
-  num_fiducial: 20
-  loc_lr: 0.1
-  model_name: small
-  
-Backbone:
-  function: ppocr.modeling.backbones.rec_mobilenet_v3,MobileNetV3
-  scale: 0.5
-  model_name: large
- 
-Head:
-  function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
-  encoder_type: rnn
-  SeqRNN:
-    hidden_size: 96
-    
-Loss:
-  function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss
-
-Optimizer:
-  function: ppocr.optimizer,AdamDecay
-  base_lr: 0.001
-  beta1: 0.9
-  beta2: 0.999
--- a/configs/rec/rec_r34_vd_none_bilstm_ctc.yml
+++ b/configs/rec/rec_r34_vd_none_bilstm_ctc.yml
 Global:
-  algorithm: CRNN
-  use_gpu: true
-  epoch_num: 72
+  use_gpu: false
+  epoch_num: 500
  log_smooth_window: 20
  print_batch_step: 10
-  save_model_dir: output/rec_CRNN
-  save_epoch_step: 3
-  eval_batch_step: 2000
-  train_batch_size_per_card: 256
-  test_batch_size_per_card: 256
-  image_shape: [3, 32, 100]
-  max_text_length: 25
-  character_type: en
-  loss_type: ctc
-  reader_yml: ./configs/rec/rec_benchmark_reader.yml
-  pretrain_weights:
-  checkpoints:
+  save_model_dir: ./output/rec/test/
+  save_epoch_step: 500
+  # evaluation is run every 5000 iterations after the 4000th iteration
+  eval_batch_step: 127
+  # if pretrained_model is saved in static mode, load_static_weights must set to True
+  load_static_weights: True
+  cal_metric_during_train: True
+  pretrained_model:
+  checkpoints: #output/rec/rec_crnn/best_accuracy
  save_inference_dir:
-  infer_img:
-
+  use_visualdl: False
+  infer_img: doc/imgs_words/ch/word_1.jpg
+  # for data or label process
+  max_text_length: 80
+  character_dict_path: ppocr/utils/ppocr_keys_v1.txt
+  character_type: 'ch'
+  use_space_char: False
+  infer_mode: False
+  use_tps: False

-Architecture:
-  function: ppocr.modeling.architectures.rec_model,RecModel
-
-Backbone:
-  function: ppocr.modeling.backbones.rec_resnet_vd,ResNet
-  layers: 34
- 
-Head:
-  function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
-  encoder_type: rnn
-  SeqRNN:
-    hidden_size: 256
-    
-Loss:
-  function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss

 Optimizer:
-  function: ppocr.optimizer,AdamDecay
-  base_lr: 0.001
+  name: Adam
  beta1: 0.9
  beta2: 0.999
+  learning_rate:
+    name: Cosine
+    lr: 0.001
+    warmup_epoch: 4
+  regularizer:
+    name: 'L2'
+    factor: 0.00001
+
+Architecture:
+  type: rec
+  algorithm: CRNN
+  Transform:
+  Backbone:
+    name: ResNet
+    layers: 200
+  Neck:
+    name: SequenceEncoder
+    encoder_type: fc
+    hidden_size: 96
+  Head:
+    name: CTC
+    fc_decay: 0.00001
+
+Loss:
+  name: CTCLoss
+
+PostProcess:
+  name: CTCLabelDecode
+
+Metric:
+  name: RecMetric
+  main_indicator: acc
+
+TRAIN:
+  dataset:
+    name: SimpleDataSet
+    data_dir: /home/zhoujun20/rec
+    file_list:
+      - /home/zhoujun20/rec/real_data.txt # dataset1
+    ratio_list: [ 0.4,0.6 ]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - CTCLabelEncode: # Class handling label
+      - RecAug:
+      - RecResizeImg:
+          image_shape: [ 3,32,320 ]
+      - keepKeys:
+          keep_keys: [ 'image','label','length' ] # dataloader将按照此顺序返回list
+  loader:
+    batch_size: 256
+    shuffle: True
+    drop_last: True
+    num_workers: 6
+
+EVAL:
+  dataset:
+    name: SimpleDataSet
+    data_dir: /home/zhoujun20/rec
+    file_list:
+      - /home/zhoujun20/rec/label_val_all.txt
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - CTCLabelEncode: # Class handling label
+      - RecResizeImg:
+          image_shape: [ 3,32,320 ]
+      - keepKeys:
+          keep_keys: [ 'image','label','length' ] # dataloader将按照此顺序返回list
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size: 256
+    num_workers: 6
--- a/configs/rec/rec_r34_vd_none_none_ctc.yml
+++ b/configs/rec/rec_r34_vd_none_none_ctc.yml
-Global:
-  algorithm: Rosetta
-  use_gpu: true
-  epoch_num: 72
-  log_smooth_window: 20
-  print_batch_step: 10
-  save_model_dir: output/rec_Rosetta
-  save_epoch_step: 3
-  eval_batch_step: 2000
-  train_batch_size_per_card: 256
-  test_batch_size_per_card: 256
-  image_shape: [3, 32, 100]
-  max_text_length: 25
-  character_type: en
-  loss_type: ctc
-  reader_yml: ./configs/rec/rec_benchmark_reader.yml
-  pretrain_weights:
-  checkpoints:
-  save_inference_dir:
-  infer_img:
-  
-Architecture:
-  function: ppocr.modeling.architectures.rec_model,RecModel
-
-Backbone:
-  function: ppocr.modeling.backbones.rec_resnet_vd,ResNet
-  layers: 34
-
-Head:
-  function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
-  encoder_type: reshape
-  
-Loss:
-  function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss
-
-Optimizer:
-  function: ppocr.optimizer,AdamDecay
-  base_lr: 0.001
-  beta1: 0.9
-  beta2: 0.999
--- a/configs/rec/rec_r34_vd_tps_bilstm_attn.yml
+++ b/configs/rec/rec_r34_vd_tps_bilstm_attn.yml
-Global:
-  algorithm: RARE
-  use_gpu: true
-  epoch_num: 72
-  log_smooth_window: 20
-  print_batch_step: 10
-  save_model_dir: output/rec_RARE
-  save_epoch_step: 3
-  eval_batch_step: 2000
-  train_batch_size_per_card: 256
-  test_batch_size_per_card: 256
-  image_shape: [3, 32, 100]
-  max_text_length: 25
-  character_type: en
-  loss_type: attention
-  tps: true
-  reader_yml: ./configs/rec/rec_benchmark_reader.yml
-  pretrain_weights:
-  checkpoints:
-  save_inference_dir:
-  infer_img:
-
-
-Architecture:
-  function: ppocr.modeling.architectures.rec_model,RecModel
-
-TPS:
-  function: ppocr.modeling.stns.tps,TPS
-  num_fiducial: 20
-  loc_lr: 0.1
-  model_name: large
-
-Backbone:
-  function: ppocr.modeling.backbones.rec_resnet_vd,ResNet
-  layers: 34
- 
-Head:
-  function: ppocr.modeling.heads.rec_attention_head,AttentionPredict
-  encoder_type: rnn
-  SeqRNN:
-    hidden_size: 256
-  Attention:
-    decoder_size: 128
-    word_vector_dim: 128
-  
-Loss:
-  function: ppocr.modeling.losses.rec_attention_loss,AttentionLoss
-  
-Optimizer:
-  function: ppocr.optimizer,AdamDecay
-  base_lr: 0.001
-  beta1: 0.9
-  beta2: 0.999
--- a/configs/rec/rec_r34_vd_tps_bilstm_ctc.yml
+++ b/configs/rec/rec_r34_vd_tps_bilstm_ctc.yml
-Global:
-  algorithm: STARNet
-  use_gpu: true
-  epoch_num: 72
-  log_smooth_window: 20
-  print_batch_step: 10
-  save_model_dir: output/rec_STARNet
-  save_epoch_step: 3
-  eval_batch_step: 2000
-  train_batch_size_per_card: 256
-  test_batch_size_per_card: 256
-  image_shape: [3, 32, 100]
-  max_text_length: 25
-  character_type: en
-  loss_type: ctc
-  tps: true
-  reader_yml: ./configs/rec/rec_benchmark_reader.yml
-  pretrain_weights:
-  checkpoints:
-  save_inference_dir:
-  infer_img:
-
-
-Architecture:
-  function: ppocr.modeling.architectures.rec_model,RecModel
-
-TPS:
-  function: ppocr.modeling.stns.tps,TPS
-  num_fiducial: 20
-  loc_lr: 0.1
-  model_name: large
-
-Backbone:
-  function: ppocr.modeling.backbones.rec_resnet_vd,ResNet
-  layers: 34
- 
-Head:
-  function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
-  encoder_type: rnn
-  SeqRNN:
-    hidden_size: 256
-    
-Loss:
-  function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss
-
-Optimizer:
-  function: ppocr.optimizer,AdamDecay
-  base_lr: 0.001
-  beta1: 0.9
-  beta2: 0.999
--- a/configs/rec/rec_r50fpn_vd_none_srn.yml
+++ b/configs/rec/rec_r50fpn_vd_none_srn.yml
-Global:
-  algorithm: SRN
-  use_gpu: true
-  epoch_num: 72
-  log_smooth_window: 20
-  print_batch_step: 10
-  save_model_dir: output/rec_pvam_withrotate
-  save_epoch_step: 1
-  eval_batch_step: 8000
-  train_batch_size_per_card: 64
-  test_batch_size_per_card: 1
-  image_shape: [1, 64, 256]
-  max_text_length: 25
-  character_type: en
-  loss_type: srn
-  num_heads: 8
-  average_window: 0.15
-  max_average_window: 15625
-  min_average_window: 10000
-  reader_yml: ./configs/rec/rec_benchmark_reader.yml
-  pretrain_weights: 
-  checkpoints:
-  save_inference_dir:
-  infer_img:
-
-Architecture:
-  function: ppocr.modeling.architectures.rec_model,RecModel
-
-Backbone:
-  function: ppocr.modeling.backbones.rec_resnet_fpn,ResNet
-  layers: 50
- 
-Head:
-  function: ppocr.modeling.heads.rec_srn_all_head,SRNPredict
-  encoder_type: rnn
-  num_encoder_TUs: 2
-  num_decoder_TUs: 4
-  hidden_dims: 512
-  SeqRNN:
-    hidden_size: 256
-    
-Loss:
-  function: ppocr.modeling.losses.rec_srn_loss,SRNLoss
-
-Optimizer:
-  function: ppocr.optimizer,AdamDecay
-  base_lr: 0.0001
-  beta1: 0.9
-  beta2: 0.999
--- a/ppocr/data/__init__.py
+++ b/ppocr/data/__init__.py
@@ -11,3 +11,114 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import os
+import sys
+import numpy as np
+import paddle
+
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(os.path.abspath(os.path.join(__dir__, '../..')))
+
+import copy
+from paddle.io import DataLoader, DistributedBatchSampler, BatchSampler
+import paddle.distributed as dist
+
+from ppocr.data.imaug import transform, create_operators
+
+__all__ = ['build_dataloader', 'transform', 'create_operators']
+
+
+def build_dataset(config, global_config):
+    from ppocr.data.dataset import SimpleDataSet, LMDBDateSet
+    support_dict = ['SimpleDataSet', 'LMDBDateSet']
+
+    module_name = config.pop('name')
+    assert module_name in support_dict, Exception(
+        'DataSet only support {}'.format(support_dict))
+
+    dataset = eval(module_name)(config, global_config)
+    return dataset
+
+
+def build_dataloader(config, device, distributed=False, global_config=None):
+    from ppocr.data.dataset import BatchBalancedDataLoader
+
+    config = copy.deepcopy(config)
+    dataset_config = config['dataset']
+
+    _dataset_list = []
+    file_list = dataset_config.pop('file_list')
+    if len(file_list) == 1:
+        ratio_list = [1.0]
+    else:
+        ratio_list = dataset_config.pop('ratio_list')
+    for file in file_list:
+        dataset_config['file_list'] = file
+        _dataset = build_dataset(dataset_config, global_config)
+        _dataset_list.append(_dataset)
+    data_loader = BatchBalancedDataLoader(_dataset_list, ratio_list,
+                                          distributed, device, config['loader'])
+    return data_loader, _dataset.info_dict
+
+
+def test_loader():
+    import time
+    from tools.program import load_config, ArgsParser
+
+    FLAGS = ArgsParser().parse_args()
+    config = load_config(FLAGS.config)
+
+    place = paddle.CPUPlace()
+    paddle.disable_static(place)
+    import time
+
+    data_loader, _ = build_dataloader(
+        config['TRAIN'], place, global_config=config['Global'])
+    start = time.time()
+    print(len(data_loader))
+    for epoch in range(1):
+        print('epoch {} ****************'.format(epoch))
+        for i, batch in enumerate(data_loader):
+            if i > len(data_loader):
+                break
+            t = time.time() - start
+            start = time.time()
+            print('{}, batch : {} ,time {}'.format(i, len(batch[0]), t))
+
+            continue
+            import matplotlib.pyplot as plt
+
+            from matplotlib import pyplot as plt
+            import cv2
+            fig = plt.figure()
+            # # cv2.imwrite('img.jpg',batch[0].numpy()[0].transpose((1,2,0)))
+            # # cv2.imwrite('bmap.jpg',batch[1].numpy()[0])
+            # # cv2.imwrite('bmask.jpg',batch[2].numpy()[0])
+            # # cv2.imwrite('smap.jpg',batch[3].numpy()[0])
+            # # cv2.imwrite('smask.jpg',batch[4].numpy()[0])
+            plt.title('img')
+            plt.imshow(batch[0].numpy()[0].transpose((1, 2, 0)))
+            # plt.figure()
+            # plt.title('bmap')
+            # plt.imshow(batch[1].numpy()[0],cmap='Greys')
+            # plt.figure()
+            # plt.title('bmask')
+            # plt.imshow(batch[2].numpy()[0],cmap='Greys')
+            # plt.figure()
+            # plt.title('smap')
+            # plt.imshow(batch[3].numpy()[0],cmap='Greys')
+            # plt.figure()
+            # plt.title('smask')
+            # plt.imshow(batch[4].numpy()[0],cmap='Greys')
+            # plt.show()
+            # break
+
+
+if __name__ == '__main__':
+    test_loader()
--- a/ppocr/data/dataset.py
+++ b/ppocr/data/dataset.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import copy
+import numpy as np
+import os
+import lmdb
+import random
+import signal
+import paddle
+from paddle.io import Dataset, DataLoader, DistributedBatchSampler, BatchSampler
+
+from .imaug import transform, create_operators
+from ppocr.utils.logging import get_logger
+
+
+def term_mp(sig_num, frame):
+    """ kill all child processes
+    """
+    pid = os.getpid()
+    pgid = os.getpgid(os.getpid())
+    print("main proc {} exit, kill process group " "{}".format(pid, pgid))
+    os.killpg(pgid, signal.SIGKILL)
+
+
+signal.signal(signal.SIGINT, term_mp)
+signal.signal(signal.SIGTERM, term_mp)
+
+
+class ModeException(Exception):
+    """
+    ModeException
+    """
+
+    def __init__(self, message='', mode=''):
+        message += "\nOnly the following 3 modes are supported: " \
+                   "train, valid, test. Given mode is {}".format(mode)
+        super(ModeException, self).__init__(message)
+
+
+class SampleNumException(Exception):
+    """
+    SampleNumException
+    """
+
+    def __init__(self, message='', sample_num=0, batch_size=1):
+        message += "\nError: The number of the whole data ({}) " \
+                   "is smaller than the batch_size ({}), and drop_last " \
+                   "is turnning on, so nothing  will feed in program, " \
+                   "Terminated now. Please reset batch_size to a smaller " \
+                   "number or feed more data!".format(sample_num, batch_size)
+        super(SampleNumException, self).__init__(message)
+
+
+def get_file_list(file_list, data_dir, delimiter='\t'):
+    """
+    read label list from file and shuffle the list
+
+    Args:
+        params(dict):
+    """
+    if isinstance(file_list, str):
+        file_list = [file_list]
+    data_source_list = []
+    for file in file_list:
+        with open(file) as f:
+            full_lines = [line.strip() for line in f]
+            for line in full_lines:
+                try:
+                    img_path, label = line.split(delimiter)
+                except:
+                    logger = get_logger()
+                    logger.warning('label error in {}'.format(line))
+                img_path = os.path.join(data_dir, img_path)
+                data = {'img_path': img_path, 'label': label}
+                data_source_list.append(data)
+    return data_source_list
+
+
+class LMDBDateSet(Dataset):
+    def __init__(self, config, global_config):
+        super(LMDBDateSet, self).__init__()
+        self.data_list = self.load_lmdb_dataset(
+            config['file_list'], global_config['max_text_length'])
+        random.shuffle(self.data_list)
+
+        self.ops = create_operators(config['transforms'], global_config)
+
+        # for rec
+        character = ''
+        for op in self.ops:
+            if hasattr(op, 'character'):
+                character = getattr(op, 'character')
+
+        self.info_dict = {'character': character}
+
+    def load_lmdb_dataset(self, data_dir, max_text_length):
+        self.env = lmdb.open(
+            data_dir,
+            max_readers=32,
+            readonly=True,
+            lock=False,
+            readahead=False,
+            meminit=False)
+        if not self.env:
+            print('cannot create lmdb from %s' % (data_dir))
+            exit(0)
+
+        filtered_index_list = []
+        with self.env.begin(write=False) as txn:
+            nSamples = int(txn.get('num-samples'.encode()))
+            self.nSamples = nSamples
+            for index in range(self.nSamples):
+                index += 1  # lmdb starts with 1
+                label_key = 'label-%09d'.encode() % index
+                label = txn.get(label_key).decode('utf-8')
+                if len(label) > max_text_length:
+                    # print(f'The length of the label is longer than max_length: length
+                    # {len(label)}, {label} in dataset {self.root}')
+                    continue
+
+                # By default, images containing characters which are not in opt.character are filtered.
+                # You can add [UNK] token to `opt.character` in utils.py instead of this filtering.
+                filtered_index_list.append(index)
+        return filtered_index_list
+
+    def print_lmdb_sets_info(self, lmdb_sets):
+        lmdb_info_strs = []
+        for dataset_idx in range(len(lmdb_sets)):
+            tmp_str = " %s:%d," % (lmdb_sets[dataset_idx]['dirpath'],
+                                   lmdb_sets[dataset_idx]['num_samples'])
+            lmdb_info_strs.append(tmp_str)
+        lmdb_info_strs = ''.join(lmdb_info_strs)
+        logger = get_logger()
+        logger.info("DataSummary:" + lmdb_info_strs)
+        return
+
+    def __getitem__(self, idx):
+        idx = self.data_list[idx]
+        with self.env.begin(write=False) as txn:
+            label_key = 'label-%09d'.encode() % idx
+            label = txn.get(label_key)
+            if label is not None:
+                label = label.decode('utf-8')
+                img_key = 'image-%09d'.encode() % idx
+                imgbuf = txn.get(img_key)
+                data = {'image': imgbuf, 'label': label}
+                outs = transform(data, self.ops)
+            else:
+                outs = None
+            if outs is None:
+                return self.__getitem__(np.random.randint(self.__len__()))
+            return outs
+
+    def __len__(self):
+        return len(self.data_list)
+
+
+class SimpleDataSet(Dataset):
+    def __init__(self, config, global_config):
+        super(SimpleDataSet, self).__init__()
+        delimiter = config.get('delimiter', '\t')
+        self.data_list = get_file_list(config['file_list'], config['data_dir'],
+                                       delimiter)
+        random.shuffle(self.data_list)
+
+        self.ops = create_operators(config['transforms'], global_config)
+
+        # for rec
+        character = ''
+        for op in self.ops:
+            if hasattr(op, 'character'):
+                character = getattr(op, 'character')
+
+        self.info_dict = {'character': character}
+
+    def __getitem__(self, idx):
+        data = copy.deepcopy(self.data_list[idx])
+        with open(data['img_path'], 'rb') as f:
+            img = f.read()
+            data['image'] = img
+        outs = transform(data, self.ops)
+        if outs is None:
+            return self.__getitem__(np.random.randint(self.__len__()))
+        return outs
+
+    def __len__(self):
+        return len(self.data_list)
+
+
+class BatchBalancedDataLoader(object):
+    def __init__(self,
+                 dataset_list: list,
+                 ratio_list: list,
+                 distributed,
+                 device,
+                 loader_args: dict):
+        """
+        对datasetlist里的dataset按照ratio_list里对应的比例组合，似的每个batch里的数据按按照比例采样的
+        :param dataset_list: 数据集列表
+        :param ratio_list: 比例列表
+        :param loader_args: dataloader的配置
+        """
+        assert sum(ratio_list) == 1 and len(dataset_list) == len(ratio_list)
+
+        self.dataset_len = 0
+        self.data_loader_list = []
+        self.dataloader_iter_list = []
+        all_batch_size = loader_args.pop('batch_size')
+        batch_size_list = list(
+            map(int, [max(1.0, all_batch_size * x) for x in ratio_list]))
+        remain_num = all_batch_size - sum(batch_size_list)
+        batch_size_list[np.argmax(ratio_list)] += remain_num
+
+        for _dataset, _batch_size in zip(dataset_list, batch_size_list):
+            if distributed:
+                batch_sampler_class = DistributedBatchSampler
+            else:
+                batch_sampler_class = BatchSampler
+            batch_sampler = batch_sampler_class(
+                dataset=_dataset,
+                batch_size=_batch_size,
+                shuffle=loader_args['shuffle'],
+                drop_last=loader_args['drop_last'], )
+            _data_loader = DataLoader(
+                dataset=_dataset,
+                batch_sampler=batch_sampler,
+                places=device,
+                num_workers=loader_args['num_workers'],
+                return_list=True, )
+            self.data_loader_list.append(_data_loader)
+            self.dataloader_iter_list.append(iter(_data_loader))
+            self.dataset_len += len(_dataset)
+
+    def __iter__(self):
+        return self
+
+    def __len__(self):
+        return min([len(x) for x in self.data_loader_list])
+
+    def __next__(self):
+        batch = []
+        for i, data_loader_iter in enumerate(self.dataloader_iter_list):
+            try:
+                _batch_i = next(data_loader_iter)
+                batch.append(_batch_i)
+            except StopIteration:
+                self.dataloader_iter_list[i] = iter(self.data_loader_list[i])
+                _batch_i = next(self.dataloader_iter_list[i])
+                batch.append(_batch_i)
+            except ValueError:
+                pass
+        if len(batch) > 0:
+            batch_list = []
+            batch_item_size = len(batch[0])
+            for i in range(batch_item_size):
+                cur_item_list = [batch_i[i] for batch_i in batch]
+                batch_list.append(paddle.concat(cur_item_list, axis=0))
+        else:
+            batch_list = batch[0]
+        return batch_list
+
+
+def fill_batch(batch):
+    """
+    2020.09.08： The current paddle version only supports returning data with the same length.
+                Therefore, fill in the batches with inconsistent lengths.
+                this method is currently only useful for text detection
+    """
+    keys = list(range(len(batch[0])))
+    v_max_len_dict = {}
+    for k in keys:
+        v_max_len_dict[k] = max([len(item[k]) for item in batch])
+    for item in batch:
+        length = []
+        for k in keys:
+            v = item[k]
+            length.append(len(v))
+            assert isinstance(v, np.ndarray)
+            if len(v) == v_max_len_dict[k]:
+                continue
+            try:
+                tmp_shape = [v_max_len_dict[k] - len(v)] + list(v[0].shape)
+            except:
+                a = 1
+            tmp_array = np.zeros(tmp_shape, dtype=v[0].dtype)
+            new_array = np.concatenate([v, tmp_array])
+            item[k] = new_array
+        item.append(length)
+    return batch
--- a/ppocr/data/det/data_augment.py
+++ b/ppocr/data/det/data_augment.py
-# -*- coding:utf-8 -*- 
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import numpy as np
-import random
-import cv2
-import math
-
-import imgaug
-import imgaug.augmenters as iaa
-
-
-def AugmentData(data):
-    img = data['image']
-    shape = img.shape
-
-    aug = iaa.Sequential(
-        [iaa.Fliplr(0.5), iaa.Affine(rotate=(-10, 10)), iaa.Resize(
-            (0.5, 3))]).to_deterministic()
-
-    def may_augment_annotation(aug, data, shape):
-        if aug is None:
-            return data
-
-        line_polys = []
-        for poly in data['polys']:
-            new_poly = may_augment_poly(aug, shape, poly)
-            line_polys.append(new_poly)
-        data['polys'] = np.array(line_polys)
-        return data
-
-    def may_augment_poly(aug, img_shape, poly):
-        keypoints = [imgaug.Keypoint(p[0], p[1]) for p in poly]
-        keypoints = aug.augment_keypoints(
-            [imgaug.KeypointsOnImage(
-                keypoints, shape=img_shape)])[0].keypoints
-        poly = [(p.x, p.y) for p in keypoints]
-        return poly
-
-    img_aug = aug.augment_image(img)
-    data['image'] = img_aug
-    data = may_augment_annotation(aug, data, shape)
-    return data
--- a/ppocr/data/det/dataset_traversal.py
+++ b/ppocr/data/det/dataset_traversal.py
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
-import os
-import sys
-import math
-import random
-import functools
-import numpy as np
-import cv2
-import string
-from ppocr.utils.utility import initial_logger
-logger = initial_logger()
-from ppocr.utils.utility import create_module
-from ppocr.utils.utility import get_image_file_list
-import time
-
-
-class TrainReader(object):
-    def __init__(self, params):
-        self.num_workers = params['num_workers']
-        self.label_file_path = params['label_file_path']
-        print(self.label_file_path)
-        self.use_mul_data = False
-        if isinstance(self.label_file_path, list):
-            self.use_mul_data = True
-            self.data_ratio_list = params['data_ratio_list']
-        self.batch_size = params['train_batch_size_per_card']
-        assert 'process_function' in params,\
-            "absence process_function in Reader"
-        self.process = create_module(params['process_function'])(params)
-
-    def __call__(self, process_id):     
-        def sample_iter_reader():
-            with open(self.label_file_path, "rb") as fin:
-                label_infor_list = fin.readlines()
-            img_num = len(label_infor_list)
-            img_id_list = list(range(img_num))
-            random.shuffle(img_id_list)
-            if sys.platform == "win32" and self.num_workers != 1:
-                print("multiprocess is not fully compatible with Windows."
-                      "num_workers will be 1.")
-                self.num_workers = 1
-            for img_id in range(process_id, img_num, self.num_workers):
-                label_infor = label_infor_list[img_id_list[img_id]]
-                outs = self.process(label_infor)
-                if outs is None:
-                    continue
-                yield outs
-
-        def sample_iter_reader_mul():
-            batch_size = 1000
-            data_source_list = self.label_file_path
-            batch_size_list = list(map(int, [max(1.0, batch_size * x) for x in self.data_ratio_list]))
-            print(self.data_ratio_list, batch_size_list)
-
-            data_filename_list, data_size_list, fetch_record_list = [], [], []
-            for data_source in data_source_list:
-                image_files = open(data_source, "rb").readlines()
-                random.shuffle(image_files)
-                data_filename_list.append(image_files)
-                data_size_list.append(len(image_files))
-                fetch_record_list.append(0)
-
-            image_batch = []
-            # get a batch of img_fns and poly_fns
-            for i in range(0, len(batch_size_list)):
-                bs = batch_size_list[i]
-                ds = data_size_list[i]
-                image_names = data_filename_list[i]
-                fetch_record = fetch_record_list[i]
-                data_path = data_source_list[i]
-                for j in range(fetch_record, fetch_record + bs):
-                    index = j % ds
-                    image_batch.append(image_names[index])
-
-                if (fetch_record + bs) > ds:
-                    fetch_record_list[i] = 0
-                    random.shuffle(data_filename_list[i])
-                else:
-                    fetch_record_list[i] = fetch_record + bs
-
-            if sys.platform == "win32":
-                print("multiprocess is not fully compatible with Windows."
-                      "num_workers will be 1.")
-                self.num_workers = 1
-
-            for label_infor in image_batch:
-                outs = self.process(label_infor)
-                if outs is None:
-                    continue
-                yield outs
-
-        def batch_iter_reader():
-            batch_outs = []
-            if self.use_mul_data:
-                print("Sample date from multiple datasets!")
-                for outs in sample_iter_reader_mul():
-                    batch_outs.append(outs)
-                    if len(batch_outs) == self.batch_size:
-                        yield batch_outs
-                        batch_outs = []                
-            else:
-                for outs in sample_iter_reader():
-                    batch_outs.append(outs)
-                    if len(batch_outs) == self.batch_size:
-                        yield batch_outs
-                        batch_outs = []
-
-        return batch_iter_reader
-
-
-class EvalTestReader(object):
-    def __init__(self, params):
-        self.params = params
-        assert 'process_function' in params,\
-            "absence process_function in EvalTestReader"
-
-    def __call__(self, mode):
-        process_function = create_module(self.params['process_function'])(
-            self.params)
-        batch_size = self.params['test_batch_size_per_card']
-
-        img_list = []
-        if mode != "test":
-            img_set_dir = self.params['img_set_dir']
-            img_name_list_path = self.params['label_file_path']
-            with open(img_name_list_path, "rb") as fin:
-                lines = fin.readlines()
-                for line in lines:
-                    img_name = line.decode().strip("\n").split("\t")[0]
-                    img_path = os.path.join(img_set_dir, img_name)
-                    img_list.append(img_path)
-        else:
-            img_path = self.params['infer_img']
-            img_list = get_image_file_list(img_path)
-
-        def batch_iter_reader():
-            batch_outs = []
-            for img_path in img_list:
-                img = cv2.imread(img_path)
-                if img is None:
-                    logger.info("{} does not exist!".format(img_path))
-                    continue
-                elif len(list(img.shape)) == 2 or img.shape[2] == 1:
-                    img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
-                outs = process_function(img)
-                outs.append(img_path)
-                batch_outs.append(outs)
-                if len(batch_outs) == batch_size:
-                    yield batch_outs
-                    batch_outs = []
-            if len(batch_outs) != 0:
-                yield batch_outs
-
-        return batch_iter_reader
--- a/ppocr/data/det/db_process.py
+++ b/ppocr/data/det/db_process.py
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
-import math
-import cv2
-import numpy as np
-import json
-import sys
-from ppocr.utils.utility import initial_logger, check_and_read_gif
-logger = initial_logger()
-
-from .data_augment import AugmentData
-from .random_crop_data import RandomCropData
-from .make_shrink_map import MakeShrinkMap
-from .make_border_map import MakeBorderMap
-
-
-class DBProcessTrain(object):
-    """
-    DB pre-process for Train mode
-    """
-
-    def __init__(self, params):
-        self.img_set_dir = params['img_set_dir']
-        self.image_shape = params['image_shape']
-
-    def order_points_clockwise(self, pts):
-        rect = np.zeros((4, 2), dtype="float32")
-        s = pts.sum(axis=1)
-        rect[0] = pts[np.argmin(s)]
-        rect[2] = pts[np.argmax(s)]
-        diff = np.diff(pts, axis=1)
-        rect[1] = pts[np.argmin(diff)]
-        rect[3] = pts[np.argmax(diff)]
-        return rect
-
-    def make_data_dict(self, imgvalue, entry):
-        boxes = []
-        texts = []
-        ignores = []
-        for rect in entry:
-            points = rect['points']
-            transcription = rect['transcription']
-            try:
-                box = self.order_points_clockwise(
-                    np.array(points).reshape(-1, 2))
-                if cv2.contourArea(box) > 0:
-                    boxes.append(box)
-                    texts.append(transcription)
-                    ignores.append(transcription in ['*', '###'])
-            except:
-                print('load label failed!')
-        data = {
-            'image': imgvalue,
-            'shape': [imgvalue.shape[0], imgvalue.shape[1]],
-            'polys': np.array(boxes),
-            'texts': texts,
-            'ignore_tags': ignores,
-        }
-        return data
-
-    def NormalizeImage(self, data):
-        im = data['image']
-        img_mean = [0.485, 0.456, 0.406]
-        img_std = [0.229, 0.224, 0.225]
-        im = im.astype(np.float32, copy=False)
-        im = im / 255
-        im -= img_mean
-        im /= img_std
-        channel_swap = (2, 0, 1)
-        im = im.transpose(channel_swap)
-        data['image'] = im
-        return data
-
-    def FilterKeys(self, data):
-        filter_keys = ['polys', 'texts', 'ignore_tags', 'shape']
-        for key in filter_keys:
-            if key in data:
-                del data[key]
-        return data
-
-    def convert_label_infor(self, label_infor):
-        label_infor = label_infor.decode()
-        label_infor = label_infor.encode('utf-8').decode('utf-8-sig')
-        substr = label_infor.strip("\n").split("\t")
-        img_path = self.img_set_dir + substr[0]
-        label = json.loads(substr[1])
-        return img_path, label
-
-    def __call__(self, label_infor):
-        img_path, gt_label = self.convert_label_infor(label_infor)
-        imgvalue, flag = check_and_read_gif(img_path)
-        if not flag:
-            imgvalue = cv2.imread(img_path)
-        if imgvalue is None:
-            logger.info("{} does not exist!".format(img_path))
-            return None
-        if len(list(imgvalue.shape)) == 2 or imgvalue.shape[2] == 1:
-            imgvalue = cv2.cvtColor(imgvalue, cv2.COLOR_GRAY2BGR)
-        data = self.make_data_dict(imgvalue, gt_label)
-        data = AugmentData(data)
-        data = RandomCropData(data, self.image_shape[1:])
-        data = MakeShrinkMap(data)
-        data = MakeBorderMap(data)
-        data = self.NormalizeImage(data)
-        data = self.FilterKeys(data)
-        return data['image'], data['shrink_map'], data['shrink_mask'], data[
-            'threshold_map'], data['threshold_mask']
-
-
-class DBProcessTest(object):
-    """
-    DB pre-process for Test mode
-    """
-
-    def __init__(self, params):
-        super(DBProcessTest, self).__init__()
-        self.resize_type = 0
-        if 'test_image_shape' in params:
-            self.image_shape = params['test_image_shape']
-            # print(self.image_shape)
-            self.resize_type = 1
-        if 'max_side_len' in params:
-            self.max_side_len = params['max_side_len']
-        else:
-            self.max_side_len = 2400
-
-    def resize_image_type0(self, im):
-        """
-        resize image to a size multiple of 32 which is required by the network
-        args:
-            img(array): array with shape [h, w, c]
-        return(tuple):
-            img, (ratio_h, ratio_w)
-        """
-        max_side_len = self.max_side_len
-        h, w, _ = im.shape
-
-        resize_w = w
-        resize_h = h
-
-        # limit the max side
-        if max(resize_h, resize_w) > max_side_len:
-            if resize_h > resize_w:
-                ratio = float(max_side_len) / resize_h
-            else:
-                ratio = float(max_side_len) / resize_w
-        else:
-            ratio = 1.
-        resize_h = int(resize_h * ratio)
-        resize_w = int(resize_w * ratio)
-        if resize_h % 32 == 0:
-            resize_h = resize_h
-        elif resize_h // 32 <= 1:
-            resize_h = 32
-        else:
-            resize_h = (resize_h // 32 - 1) * 32
-        if resize_w % 32 == 0:
-            resize_w = resize_w
-        elif resize_w // 32 <= 1:
-            resize_w = 32
-        else:
-            resize_w = (resize_w // 32 - 1) * 32
-        try:
-            if int(resize_w) <= 0 or int(resize_h) <= 0:
-                return None, (None, None)
-            im = cv2.resize(im, (int(resize_w), int(resize_h)))
-        except:
-            print(im.shape, resize_w, resize_h)
-            sys.exit(0)
-        ratio_h = resize_h / float(h)
-        ratio_w = resize_w / float(w)
-        return im, (ratio_h, ratio_w)
-
-    def resize_image_type1(self, im):
-        resize_h, resize_w = self.image_shape
-        ori_h, ori_w = im.shape[:2]  # (h, w, c)
-        im = cv2.resize(im, (int(resize_w), int(resize_h)))
-        ratio_h = float(resize_h) / ori_h
-        ratio_w = float(resize_w) / ori_w
-        return im, (ratio_h, ratio_w)
-
-    def normalize(self, im):
-        img_mean = [0.485, 0.456, 0.406]
-        img_std = [0.229, 0.224, 0.225]
-        im = im.astype(np.float32, copy=False)
-        im = im / 255
-        im[:, :, 0] -= img_mean[0]
-        im[:, :, 1] -= img_mean[1]
-        im[:, :, 2] -= img_mean[2]
-        im[:, :, 0] /= img_std[0]
-        im[:, :, 1] /= img_std[1]
-        im[:, :, 2] /= img_std[2]
-        channel_swap = (2, 0, 1)
-        im = im.transpose(channel_swap)
-        return im
-
-    def __call__(self, im):
-        if self.resize_type == 0:
-            im, (ratio_h, ratio_w) = self.resize_image_type0(im)
-        else:
-            im, (ratio_h, ratio_w) = self.resize_image_type1(im)
-        im = self.normalize(im)
-        im = im[np.newaxis, :]
-        return [im, (ratio_h, ratio_w)]
--- a/ppocr/data/det/east_process.py
+++ b/ppocr/data/det/east_process.py
--- a/ppocr/data/det/make_border_map.py
+++ b/ppocr/data/det/make_border_map.py
-# -*- coding:utf-8 -*- 
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import numpy as np
-import cv2
-np.seterr(divide='ignore', invalid='ignore')
-import pyclipper
-from shapely.geometry import Polygon
-import sys
-import warnings
-warnings.simplefilter("ignore")
-
-
-def draw_border_map(polygon, canvas, mask, shrink_ratio):
-    polygon = np.array(polygon)
-    assert polygon.ndim == 2
-    assert polygon.shape[1] == 2
-
-    polygon_shape = Polygon(polygon)
-    if polygon_shape.area <= 0:
-        return
-    distance = polygon_shape.area * (
-        1 - np.power(shrink_ratio, 2)) / polygon_shape.length
-    subject = [tuple(l) for l in polygon]
-    padding = pyclipper.PyclipperOffset()
-    padding.AddPath(subject, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
-
-    padded_polygon = np.array(padding.Execute(distance)[0])
-    cv2.fillPoly(mask, [padded_polygon.astype(np.int32)], 1.0)
-
-    xmin = padded_polygon[:, 0].min()
-    xmax = padded_polygon[:, 0].max()
-    ymin = padded_polygon[:, 1].min()
-    ymax = padded_polygon[:, 1].max()
-    width = xmax - xmin + 1
-    height = ymax - ymin + 1
-
-    polygon[:, 0] = polygon[:, 0] - xmin
-    polygon[:, 1] = polygon[:, 1] - ymin
-
-    xs = np.broadcast_to(
-        np.linspace(
-            0, width - 1, num=width).reshape(1, width), (height, width))
-    ys = np.broadcast_to(
-        np.linspace(
-            0, height - 1, num=height).reshape(height, 1), (height, width))
-
-    distance_map = np.zeros((polygon.shape[0], height, width), dtype=np.float32)
-    for i in range(polygon.shape[0]):
-        j = (i + 1) % polygon.shape[0]
-        absolute_distance = _distance(xs, ys, polygon[i], polygon[j])
-        distance_map[i] = np.clip(absolute_distance / distance, 0, 1)
-    distance_map = distance_map.min(axis=0)
-
-    xmin_valid = min(max(0, xmin), canvas.shape[1] - 1)
-    xmax_valid = min(max(0, xmax), canvas.shape[1] - 1)
-    ymin_valid = min(max(0, ymin), canvas.shape[0] - 1)
-    ymax_valid = min(max(0, ymax), canvas.shape[0] - 1)
-    canvas[ymin_valid:ymax_valid + 1, xmin_valid:xmax_valid + 1] = np.fmax(
-        1 - distance_map[ymin_valid - ymin:ymax_valid - ymax + height,
-                         xmin_valid - xmin:xmax_valid - xmax + width],
-        canvas[ymin_valid:ymax_valid + 1, xmin_valid:xmax_valid + 1])
-
-
-def _distance(xs, ys, point_1, point_2):
-    '''
-    compute the distance from point to a line
-    ys: coordinates in the first axis
-    xs: coordinates in the second axis
-    point_1, point_2: (x, y), the end of the line
-    '''
-    height, width = xs.shape[:2]
-    square_distance_1 = np.square(xs - point_1[0]) + np.square(ys - point_1[1])
-    square_distance_2 = np.square(xs - point_2[0]) + np.square(ys - point_2[1])
-    square_distance = np.square(point_1[0] - point_2[0]) + np.square(point_1[
-        1] - point_2[1])
-
-    cosin = (square_distance - square_distance_1 - square_distance_2) / (
-        2 * np.sqrt(square_distance_1 * square_distance_2))
-    square_sin = 1 - np.square(cosin)
-    square_sin = np.nan_to_num(square_sin)
-    result = np.sqrt(square_distance_1 * square_distance_2 * square_sin /
-                     square_distance)
-
-    result[cosin <
-           0] = np.sqrt(np.fmin(square_distance_1, square_distance_2))[cosin <
-                                                                       0]
-    # self.extend_line(point_1, point_2, result)
-    return result
-
-
-def extend_line(point_1, point_2, result, shrink_ratio):
-    ex_point_1 = (
-        int(
-            round(point_1[0] + (point_1[0] - point_2[0]) * (1 + shrink_ratio))),
-        int(
-            round(point_1[1] + (point_1[1] - point_2[1]) * (1 + shrink_ratio))))
-    cv2.line(
-        result,
-        tuple(ex_point_1),
-        tuple(point_1),
-        4096.0,
-        1,
-        lineType=cv2.LINE_AA,
-        shift=0)
-    ex_point_2 = (
-        int(
-            round(point_2[0] + (point_2[0] - point_1[0]) * (1 + shrink_ratio))),
-        int(
-            round(point_2[1] + (point_2[1] - point_1[1]) * (1 + shrink_ratio))))
-    cv2.line(
-        result,
-        tuple(ex_point_2),
-        tuple(point_2),
-        4096.0,
-        1,
-        lineType=cv2.LINE_AA,
-        shift=0)
-    return ex_point_1, ex_point_2
-
-
-def MakeBorderMap(data):
-    shrink_ratio = 0.4
-    thresh_min = 0.3
-    thresh_max = 0.7
-
-    im = data['image']
-    text_polys = data['polys']
-    ignore_tags = data['ignore_tags']
-
-    canvas = np.zeros(im.shape[:2], dtype=np.float32)
-    mask = np.zeros(im.shape[:2], dtype=np.float32)
-
-    for i in range(len(text_polys)):
-        if ignore_tags[i]:
-            continue
-        draw_border_map(
-            text_polys[i], canvas, mask=mask, shrink_ratio=shrink_ratio)
-    canvas = canvas * (thresh_max - thresh_min) + thresh_min
-
-    data['threshold_map'] = canvas
-    data['threshold_mask'] = mask
-    return data
--- a/ppocr/data/det/make_shrink_map.py
+++ b/ppocr/data/det/make_shrink_map.py
-# -*- coding:utf-8 -*- 
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import numpy as np
-import cv2
-from shapely.geometry import Polygon
-import pyclipper
-
-
-def validate_polygons(polygons, ignore_tags, h, w):
-    '''
-    polygons (numpy.array, required): of shape (num_instances, num_points, 2)
-    '''
-    if len(polygons) == 0:
-        return polygons, ignore_tags
-    assert len(polygons) == len(ignore_tags)
-    for polygon in polygons:
-        polygon[:, 0] = np.clip(polygon[:, 0], 0, w - 1)
-        polygon[:, 1] = np.clip(polygon[:, 1], 0, h - 1)
-
-    for i in range(len(polygons)):
-        area = polygon_area(polygons[i])
-        if abs(area) < 1:
-            ignore_tags[i] = True
-        if area > 0:
-            polygons[i] = polygons[i][::-1, :]
-    return polygons, ignore_tags
-
-
-def polygon_area(polygon):
-    edge = 0
-    for i in range(polygon.shape[0]):
-        next_index = (i + 1) % polygon.shape[0]
-        edge += (polygon[next_index, 0] - polygon[i, 0]) * (
-            polygon[next_index, 1] - polygon[i, 1])
-
-    return edge / 2.
-
-
-def MakeShrinkMap(data):
-    min_text_size = 8
-    shrink_ratio = 0.4
-
-    image = data['image']
-    text_polys = data['polys']
-    ignore_tags = data['ignore_tags']
-
-    h, w = image.shape[:2]
-    text_polys, ignore_tags = validate_polygons(text_polys, ignore_tags, h, w)
-    gt = np.zeros((h, w), dtype=np.float32)
-    # gt = np.zeros((1, h, w), dtype=np.float32)
-    mask = np.ones((h, w), dtype=np.float32)
-    for i in range(len(text_polys)):
-        polygon = text_polys[i]
-        height = max(polygon[:, 1]) - min(polygon[:, 1])
-        width = max(polygon[:, 0]) - min(polygon[:, 0])
-        # height = min(np.linalg.norm(polygon[0] - polygon[3]),
-        #             np.linalg.norm(polygon[1] - polygon[2]))
-        # width = min(np.linalg.norm(polygon[0] - polygon[1]),
-        #             np.linalg.norm(polygon[2] - polygon[3]))
-        if ignore_tags[i] or min(height, width) < min_text_size:
-            cv2.fillPoly(mask, polygon.astype(np.int32)[np.newaxis, :, :], 0)
-            ignore_tags[i] = True
-        else:
-            polygon_shape = Polygon(polygon)
-            distance = polygon_shape.area * (
-                1 - np.power(shrink_ratio, 2)) / polygon_shape.length
-            subject = [tuple(l) for l in text_polys[i]]
-            padding = pyclipper.PyclipperOffset()
-            padding.AddPath(subject, pyclipper.JT_ROUND,
-                            pyclipper.ET_CLOSEDPOLYGON)
-            shrinked = padding.Execute(-distance)
-            if shrinked == []:
-                cv2.fillPoly(mask,
-                             polygon.astype(np.int32)[np.newaxis, :, :], 0)
-                ignore_tags[i] = True
-                continue
-            shrinked = np.array(shrinked[0]).reshape(-1, 2)
-            cv2.fillPoly(gt, [shrinked.astype(np.int32)], 1)
-            # cv2.fillPoly(gt[0], [shrinked.astype(np.int32)], 1)
-
-    data['shrink_map'] = gt
-    data['shrink_mask'] = mask
-    return data
--- a/ppocr/data/det/sast_process.py
+++ b/ppocr/data/det/sast_process.py
--- a/ppocr/data/imaug/__init__.py
+++ b/ppocr/data/imaug/__init__.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from .iaa_augment import IaaAugment
+from .make_border_map import MakeBorderMap
+from .make_shrink_map import MakeShrinkMap
+from .random_crop_data import EastRandomCropData, PSERandomCrop
+
+from .rec_img_aug import RecAug, RecResizeImg
+
+from .operators import *
+from .label_ops import *
+
+
+def transform(data, ops=None):
+    """ transform """
+    if ops is None:
+        ops = []
+    for op in ops:
+        data = op(data)
+        if data is None:
+            return None
+    return data
+
+
+def create_operators(op_param_list, global_config=None):
+    """
+    create operators based on the config
+
+    Args:
+        params(list): a dict list, used to create some operators
+    """
+    assert isinstance(op_param_list, list), ('operator config should be a list')
+    ops = []
+    for operator in op_param_list:
+        assert isinstance(operator,
+                          dict) and len(operator) == 1, "yaml format error"
+        op_name = list(operator)[0]
+        param = {} if operator[op_name] is None else operator[op_name]
+        if global_config is not None:
+            param.update(global_config)
+        op = eval(op_name)(**param)
+        ops.append(op)
+    return ops
--- a/ppocr/data/imaug/iaa_augment.py
+++ b/ppocr/data/imaug/iaa_augment.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import numpy as np
+import imgaug
+import imgaug.augmenters as iaa
+
+
+class AugmenterBuilder(object):
+    def __init__(self):
+        pass
+
+    def build(self, args, root=True):
+        if args is None or len(args) == 0:
+            return None
+        elif isinstance(args, list):
+            if root:
+                sequence = [self.build(value, root=False) for value in args]
+                return iaa.Sequential(sequence)
+            else:
+                return getattr(iaa, args[0])(
+                    *[self.to_tuple_if_list(a) for a in args[1:]])
+        elif isinstance(args, dict):
+            cls = getattr(iaa, args['type'])
+            return cls(**{
+                k: self.to_tuple_if_list(v)
+                for k, v in args['args'].items()
+            })
+        else:
+            raise RuntimeError('unknown augmenter arg: ' + str(args))
+
+    def to_tuple_if_list(self, obj):
+        if isinstance(obj, list):
+            return tuple(obj)
+        return obj
+
+
+class IaaAugment():
+    def __init__(self, augmenter_args=None, **kwargs):
+        if augmenter_args is None:
+            augmenter_args = [{
+                'type': 'Fliplr',
+                'args': {
+                    'p': 0.5
+                }
+            }, {
+                'type': 'Affine',
+                'args': {
+                    'rotate': [-10, 10]
+                }
+            }, {
+                'type': 'Resize',
+                'args': {
+                    'size': [0.5, 3]
+                }
+            }]
+        self.augmenter = AugmenterBuilder().build(augmenter_args)
+
+    def __call__(self, data):
+        image = data['image']
+        shape = image.shape
+
+        if self.augmenter:
+            aug = self.augmenter.to_deterministic()
+            data['image'] = aug.augment_image(image)
+            data = self.may_augment_annotation(aug, data, shape)
+        return data
+
+    def may_augment_annotation(self, aug, data, shape):
+        if aug is None:
+            return data
+
+        line_polys = []
+        for poly in data['polys']:
+            new_poly = self.may_augment_poly(aug, shape, poly)
+            line_polys.append(new_poly)
+        data['polys'] = np.array(line_polys)
+        return data
+
+    def may_augment_poly(self, aug, img_shape, poly):
+        keypoints = [imgaug.Keypoint(p[0], p[1]) for p in poly]
+        keypoints = aug.augment_keypoints(
+            [imgaug.KeypointsOnImage(
+                keypoints, shape=img_shape)])[0].keypoints
+        poly = [(p.x, p.y) for p in keypoints]
+        return poly
--- a/ppocr/data/imaug/label_ops.py
+++ b/ppocr/data/imaug/label_ops.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import numpy as np
+from ppocr.utils.logging import get_logger
+
+
+class DetLabelEncode(object):
+    def __init__(self, **kwargs):
+        pass
+
+    def __call__(self, data):
+        import json
+        label = data['label']
+        label = json.loads(label)
+        nBox = len(label)
+        boxes, txts, txt_tags = [], [], []
+        for bno in range(0, nBox):
+            box = label[bno]['points']
+            txt = label[bno]['transcription']
+            boxes.append(box)
+            txts.append(txt)
+            if txt in ['*', '###']:
+                txt_tags.append(True)
+            else:
+                txt_tags.append(False)
+        boxes = np.array(boxes, dtype=np.float32)
+        txt_tags = np.array(txt_tags, dtype=np.bool)
+
+        data['polys'] = boxes
+        data['texts'] = txts
+        data['ignore_tags'] = txt_tags
+        return data
+
+    def order_points_clockwise(self, pts):
+        rect = np.zeros((4, 2), dtype="float32")
+        s = pts.sum(axis=1)
+        rect[0] = pts[np.argmin(s)]
+        rect[2] = pts[np.argmax(s)]
+        diff = np.diff(pts, axis=1)
+        rect[1] = pts[np.argmin(diff)]
+        rect[3] = pts[np.argmax(diff)]
+        return rect
+
+
+class BaseRecLabelEncode(object):
+    """ Convert between text-label and text-index """
+
+    def __init__(self,
+                 max_text_length,
+                 character_dict_path=None,
+                 character_type='ch',
+                 use_space_char=False):
+        support_character_type = ['ch', 'en', 'en_sensitive']
+        assert character_type in support_character_type, "Only {} are supported now but get {}".format(
+            support_character_type, self.character_str)
+
+        self.max_text_len = max_text_length
+        if character_type == "en":
+            self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
+            dict_character = list(self.character_str)
+        elif character_type == "ch":
+            self.character_str = ""
+            assert character_dict_path is not None, "character_dict_path should not be None when character_type is ch"
+            with open(character_dict_path, "rb") as fin:
+                lines = fin.readlines()
+                for line in lines:
+                    line = line.decode('utf-8').strip("\n").strip("\r\n")
+                    self.character_str += line
+            if use_space_char:
+                self.character_str += " "
+            dict_character = list(self.character_str)
+        elif character_type == "en_sensitive":
+            # same with ASTER setting (use 94 char).
+            import string
+            self.character_str = string.printable[:-6]
+            dict_character = list(self.character_str)
+        self.character_type = character_type
+        dict_character = self.add_special_char(dict_character)
+        self.dict = {}
+        for i, char in enumerate(dict_character):
+            self.dict[char] = i
+        self.character = dict_character
+
+    def add_special_char(self, dict_character):
+        return dict_character
+
+    def encode(self, text):
+        """convert text-label into text-index.
+        input:
+            text: text labels of each image. [batch_size]
+
+        output:
+            text: concatenated text index for CTCLoss.
+                    [sum(text_lengths)] = [text_index_0 + text_index_1 + ... + text_index_(n - 1)]
+            length: length of each text. [batch_size]
+        """
+        if len(text) > self.max_text_len:
+            return None
+        if self.character_type == "en":
+            text = text.lower()
+        text_list = []
+        for char in text:
+            if char not in self.dict:
+                # logger = get_logger()
+                # logger.warning('{} is not in dict'.format(char))
+                continue
+            text_list.append(self.dict[char])
+        if len(text_list) == 0:
+            return None
+        return text_list
+
+    def get_ignored_tokens(self):
+        return [0]  # for ctc blank
+
+
+class CTCLabelEncode(BaseRecLabelEncode):
+    """ Convert between text-label and text-index """
+
+    def __init__(self,
+                 max_text_length,
+                 character_dict_path=None,
+                 character_type='ch',
+                 use_space_char=False,
+                 **kwargs):
+        super(CTCLabelEncode,
+              self).__init__(max_text_length, character_dict_path,
+                             character_type, use_space_char)
+
+    def __call__(self, data):
+        text = data['label']
+        text = self.encode(text)
+        if text is None:
+            return None
+        data['length'] = np.array(len(text))
+        text = text + [0] * (self.max_text_len - len(text))
+        data['label'] = np.array(text)
+        return data
+
+    def add_special_char(self, dict_character):
+        dict_character = ['blank'] + dict_character
+        return dict_character
+
+
+class AttnLabelEncode(BaseRecLabelEncode):
+    """ Convert between text-label and text-index """
+
+    def __init__(self,
+                 max_text_length,
+                 character_dict_path=None,
+                 character_type='ch',
+                 use_space_char=False,
+                 **kwargs):
+        super(AttnLabelEncode,
+              self).__init__(max_text_length, character_dict_path,
+                             character_type, use_space_char)
+        self.beg_str = "sos"
+        self.end_str = "eos"
+
+    def add_special_char(self, dict_character):
+        dict_character = [self.beg_str, self.end_str] + dict_character
+        return dict_character
+
+    def __call__(self, text):
+        text = self.encode(text)
+        return text
+
+    def get_ignored_tokens(self):
+        beg_idx = self.get_beg_end_flag_idx("beg")
+        end_idx = self.get_beg_end_flag_idx("end")
+        return [beg_idx, end_idx]
+
+    def get_beg_end_flag_idx(self, beg_or_end):
+        if beg_or_end == "beg":
+            idx = np.array(self.dict[self.beg_str])
+        elif beg_or_end == "end":
+            idx = np.array(self.dict[self.end_str])
+        else:
+            assert False, "Unsupport type %s in get_beg_end_flag_idx" \
+                          % beg_or_end
+        return idx
--- a/ppocr/data/imaug/make_border_map.py
+++ b/ppocr/data/imaug/make_border_map.py
+# -*- coding:utf-8 -*- 
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import numpy as np
+import cv2
+
+np.seterr(divide='ignore', invalid='ignore')
+import pyclipper
+from shapely.geometry import Polygon
+import sys
+import warnings
+
+warnings.simplefilter("ignore")
+
+__all__ = ['MakeBorderMap']
+
+
+class MakeBorderMap(object):
+    def __init__(self,
+                 shrink_ratio=0.4,
+                 thresh_min=0.3,
+                 thresh_max=0.7,
+                 **kwargs):
+        self.shrink_ratio = shrink_ratio
+        self.thresh_min = thresh_min
+        self.thresh_max = thresh_max
+
+    def __call__(self, data: dict) -> dict:
+
+        img = data['image']
+        text_polys = data['polys']
+        ignore_tags = data['ignore_tags']
+
+        canvas = np.zeros(img.shape[:2], dtype=np.float32)
+        mask = np.zeros(img.shape[:2], dtype=np.float32)
+
+        for i in range(len(text_polys)):
+            if ignore_tags[i]:
+                continue
+            self.draw_border_map(text_polys[i], canvas, mask=mask)
+        canvas = canvas * (self.thresh_max - self.thresh_min) + self.thresh_min
+
+        data['threshold_map'] = canvas
+        data['threshold_mask'] = mask
+        return data
+
+    def draw_border_map(self, polygon, canvas, mask):
+        polygon = np.array(polygon)
+        assert polygon.ndim == 2
+        assert polygon.shape[1] == 2
+
+        polygon_shape = Polygon(polygon)
+        if polygon_shape.area <= 0:
+            return
+        distance = polygon_shape.area * (
+            1 - np.power(self.shrink_ratio, 2)) / polygon_shape.length
+        subject = [tuple(l) for l in polygon]
+        padding = pyclipper.PyclipperOffset()
+        padding.AddPath(subject, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
+
+        padded_polygon = np.array(padding.Execute(distance)[0])
+        cv2.fillPoly(mask, [padded_polygon.astype(np.int32)], 1.0)
+
+        xmin = padded_polygon[:, 0].min()
+        xmax = padded_polygon[:, 0].max()
+        ymin = padded_polygon[:, 1].min()
+        ymax = padded_polygon[:, 1].max()
+        width = xmax - xmin + 1
+        height = ymax - ymin + 1
+
+        polygon[:, 0] = polygon[:, 0] - xmin
+        polygon[:, 1] = polygon[:, 1] - ymin
+
+        xs = np.broadcast_to(
+            np.linspace(
+                0, width - 1, num=width).reshape(1, width), (height, width))
+        ys = np.broadcast_to(
+            np.linspace(
+                0, height - 1, num=height).reshape(height, 1), (height, width))
+
+        distance_map = np.zeros(
+            (polygon.shape[0], height, width), dtype=np.float32)
+        for i in range(polygon.shape[0]):
+            j = (i + 1) % polygon.shape[0]
+            absolute_distance = self._distance(xs, ys, polygon[i], polygon[j])
+            distance_map[i] = np.clip(absolute_distance / distance, 0, 1)
+        distance_map = distance_map.min(axis=0)
+
+        xmin_valid = min(max(0, xmin), canvas.shape[1] - 1)
+        xmax_valid = min(max(0, xmax), canvas.shape[1] - 1)
+        ymin_valid = min(max(0, ymin), canvas.shape[0] - 1)
+        ymax_valid = min(max(0, ymax), canvas.shape[0] - 1)
+        canvas[ymin_valid:ymax_valid + 1, xmin_valid:xmax_valid + 1] = np.fmax(
+            1 - distance_map[ymin_valid - ymin:ymax_valid - ymax + height,
+                             xmin_valid - xmin:xmax_valid - xmax + width],
+            canvas[ymin_valid:ymax_valid + 1, xmin_valid:xmax_valid + 1])
+
+    def _distance(self, xs, ys, point_1, point_2):
+        '''
+        compute the distance from point to a line
+        ys: coordinates in the first axis
+        xs: coordinates in the second axis
+        point_1, point_2: (x, y), the end of the line
+        '''
+        height, width = xs.shape[:2]
+        square_distance_1 = np.square(xs - point_1[0]) + np.square(ys - point_1[
+            1])
+        square_distance_2 = np.square(xs - point_2[0]) + np.square(ys - point_2[
+            1])
+        square_distance = np.square(point_1[0] - point_2[0]) + np.square(
+            point_1[1] - point_2[1])
+
+        cosin = (square_distance - square_distance_1 - square_distance_2) / (
+            2 * np.sqrt(square_distance_1 * square_distance_2))
+        square_sin = 1 - np.square(cosin)
+        square_sin = np.nan_to_num(square_sin)
+        result = np.sqrt(square_distance_1 * square_distance_2 * square_sin /
+                         square_distance)
+
+        result[cosin <
+               0] = np.sqrt(np.fmin(square_distance_1, square_distance_2))[cosin
+                                                                           < 0]
+        # self.extend_line(point_1, point_2, result)
+        return result
+
+    def extend_line(self, point_1, point_2, result, shrink_ratio):
+        ex_point_1 = (int(
+            round(point_1[0] + (point_1[0] - point_2[0]) * (1 + shrink_ratio))),
+                      int(
+                          round(point_1[1] + (point_1[1] - point_2[1]) * (
+                              1 + shrink_ratio))))
+        cv2.line(
+            result,
+            tuple(ex_point_1),
+            tuple(point_1),
+            4096.0,
+            1,
+            lineType=cv2.LINE_AA,
+            shift=0)
+        ex_point_2 = (int(
+            round(point_2[0] + (point_2[0] - point_1[0]) * (1 + shrink_ratio))),
+                      int(
+                          round(point_2[1] + (point_2[1] - point_1[1]) * (
+                              1 + shrink_ratio))))
+        cv2.line(
+            result,
+            tuple(ex_point_2),
+            tuple(point_2),
+            4096.0,
+            1,
+            lineType=cv2.LINE_AA,
+            shift=0)
+        return ex_point_1, ex_point_2
--- a/ppocr/data/imaug/make_shrink_map.py
+++ b/ppocr/data/imaug/make_shrink_map.py
+# -*- coding:utf-8 -*- 
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import numpy as np
+import cv2
+from shapely.geometry import Polygon
+import pyclipper
+
+__all__ = ['MakeShrinkMap']
+
+
+class MakeShrinkMap(object):
+    r'''
+    Making binary mask from detection data with ICDAR format.
+    Typically following the process of class `MakeICDARData`.
+    '''
+
+    def __init__(self, min_text_size=8, shrink_ratio=0.4, **kwargs):
+        self.min_text_size = min_text_size
+        self.shrink_ratio = shrink_ratio
+
+    def __call__(self, data):
+        image = data['image']
+        text_polys = data['polys']
+        ignore_tags = data['ignore_tags']
+
+        h, w = image.shape[:2]
+        text_polys, ignore_tags = self.validate_polygons(text_polys,
+                                                         ignore_tags, h, w)
+        gt = np.zeros((h, w), dtype=np.float32)
+        # gt = np.zeros((1, h, w), dtype=np.float32)
+        mask = np.ones((h, w), dtype=np.float32)
+        for i in range(len(text_polys)):
+            polygon = text_polys[i]
+            height = max(polygon[:, 1]) - min(polygon[:, 1])
+            width = max(polygon[:, 0]) - min(polygon[:, 0])
+            if ignore_tags[i] or min(height, width) < self.min_text_size:
+                cv2.fillPoly(mask,
+                             polygon.astype(np.int32)[np.newaxis, :, :], 0)
+                ignore_tags[i] = True
+            else:
+                polygon_shape = Polygon(polygon)
+                distance = polygon_shape.area * (
+                    1 - np.power(self.shrink_ratio, 2)) / polygon_shape.length
+                subject = [tuple(l) for l in text_polys[i]]
+                padding = pyclipper.PyclipperOffset()
+                padding.AddPath(subject, pyclipper.JT_ROUND,
+                                pyclipper.ET_CLOSEDPOLYGON)
+                shrinked = padding.Execute(-distance)
+                if shrinked == []:
+                    cv2.fillPoly(mask,
+                                 polygon.astype(np.int32)[np.newaxis, :, :], 0)
+                    ignore_tags[i] = True
+                    continue
+                shrinked = np.array(shrinked[0]).reshape(-1, 2)
+                cv2.fillPoly(gt, [shrinked.astype(np.int32)], 1)
+                # cv2.fillPoly(gt[0], [shrinked.astype(np.int32)], 1)
+
+        data['shrink_map'] = gt
+        data['shrink_mask'] = mask
+        return data
+
+    def validate_polygons(self, polygons, ignore_tags, h, w):
+        '''
+        polygons (numpy.array, required): of shape (num_instances, num_points, 2)
+        '''
+        if len(polygons) == 0:
+            return polygons, ignore_tags
+        assert len(polygons) == len(ignore_tags)
+        for polygon in polygons:
+            polygon[:, 0] = np.clip(polygon[:, 0], 0, w - 1)
+            polygon[:, 1] = np.clip(polygon[:, 1], 0, h - 1)
+
+        for i in range(len(polygons)):
+            area = self.polygon_area(polygons[i])
+            if abs(area) < 1:
+                ignore_tags[i] = True
+            if area > 0:
+                polygons[i] = polygons[i][::-1, :]
+        return polygons, ignore_tags
+
+    def polygon_area(self, polygon):
+        # return cv2.contourArea(polygon.astype(np.float32))
+        edge = 0
+        for i in range(polygon.shape[0]):
+            next_index = (i + 1) % polygon.shape[0]
+            edge += (polygon[next_index, 0] - polygon[i, 0]) * (
+                polygon[next_index, 1] - polygon[i, 1])
+
+        return edge / 2.