Merge pull request #5 from PaddlePaddle/develop

merge paddleocr

Merge pull request #5 from PaddlePaddle/develop
merge paddleocr
ee05c913 · zhoujun · GitHub · 7c09c97d · 2bdaea56 · ee05c913
Unverified Commit ee05c913 authored Aug 27, 2020 by zhoujun Committed by GitHub Aug 27, 2020
20 changed files
--- a/doc/doc_en/whl_en.md
+++ b/doc/doc_en/whl_en.md
+# paddleocr package
+
+## Get started quickly
+### install package
+install by pypi
+```bash
+pip install paddleocr
+```
+
+build own whl package and install
+```bash
+python setup.py bdist_wheel
+pip install dist/paddleocr-0.0.3-py3-none-any.whl
+```
+### 1. Use by code
+
+* detection and recognition
+```python
+from paddleocr import PaddleOCR,draw_ocr
+ocr = PaddleOCR() # need to run only once to download and load model into memory
+img_path = 'PaddleOCR/doc/imgs_en/img_12.jpg'
+result = ocr.ocr(img_path)
+for line in result:
+    print(line)
+
+# draw result
+from PIL import Image
+image = Image.open(img_path).convert('RGB')
+boxes = [line[0] for line in result]
+txts = [line[1][0] for line in result]
+scores = [line[1][1] for line in result]
+im_show = draw_ocr(image, boxes, txts, scores, font_path='/path/to/PaddleOCR/doc/simfang.ttf')
+im_show = Image.fromarray(im_show)
+im_show.save('result.jpg')
+```
+
+Output will be a list, each item contains bounding box, text and recognition confidence
+```bash
+[[[442.0, 173.0], [1169.0, 173.0], [1169.0, 225.0], [442.0, 225.0]], ['ACKNOWLEDGEMENTS', 0.99283075]]
+[[[393.0, 340.0], [1207.0, 342.0], [1207.0, 389.0], [393.0, 387.0]], ['We would like to thank all the designers and', 0.9357758]]
+[[[399.0, 398.0], [1204.0, 398.0], [1204.0, 433.0], [399.0, 433.0]], ['contributors whohave been involved in the', 0.9592447]]
+......
+```
+
+Visualization of results
+
+<div align="center">
+    <img src="../imgs_results/whl/12_det_rec.jpg" width="800">
+</div>
+
+* only detection
+```python
+from paddleocr import PaddleOCR,draw_ocr
+ocr = PaddleOCR() # need to run only once to download and load model into memory
+img_path = 'PaddleOCR/doc/imgs_en/img_12.jpg'
+result = ocr.ocr(img_path,rec=False)
+for line in result:
+    print(line)
+
+# draw result
+from PIL import Image
+
+image = Image.open(img_path).convert('RGB')
+im_show = draw_ocr(image, result, txts=None, scores=None, font_path='/path/to/PaddleOCR/doc/simfang.ttf')
+im_show = Image.fromarray(im_show)
+im_show.save('result.jpg')
+```
+
+Output will be a list, each item only contains bounding box
+```bash
+[[756.0, 812.0], [805.0, 812.0], [805.0, 830.0], [756.0, 830.0]]
+[[820.0, 803.0], [1085.0, 801.0], [1085.0, 836.0], [820.0, 838.0]]
+[[393.0, 801.0], [715.0, 805.0], [715.0, 839.0], [393.0, 836.0]]
+......
+```
+
+Visualization of results
+
+<div align="center">
+    <img src="../imgs_results/whl/12_det.jpg" width="800">
+</div>
+
+* only recognition
+```python
+from paddleocr import PaddleOCR
+ocr = PaddleOCR() # need to run only once to load model into memory
+img_path = 'PaddleOCR/doc/imgs_words_en/word_10.png'
+result = ocr.ocr(img_path,det=False)
+for line in result:
+    print(line)
+```
+
+Output will be a list, each item contains text and recognition confidence
+```bash
+['PAIN', 0.990372]
+```
+
+### Use by command line
+
+show help information
+```bash
+paddleocr -h
+```
+
+* detection and recognition
+```bash
+paddleocr --image_dir PaddleOCR/doc/imgs_en/img_12.jpg
+```
+
+Output will be a list, each item contains bounding box, text and recognition confidence
+```bash
+[[[442.0, 173.0], [1169.0, 173.0], [1169.0, 225.0], [442.0, 225.0]], ['ACKNOWLEDGEMENTS', 0.99283075]]
+[[[393.0, 340.0], [1207.0, 342.0], [1207.0, 389.0], [393.0, 387.0]], ['We would like to thank all the designers and', 0.9357758]]
+[[[399.0, 398.0], [1204.0, 398.0], [1204.0, 433.0], [399.0, 433.0]], ['contributors whohave been involved in the', 0.9592447]]
+......
+```
+
+* only detection
+```bash
+paddleocr --image_dir PaddleOCR/doc/imgs_en/img_12.jpg --rec false
+```
+
+Output will be a list, each item only contains bounding box
+```bash
+[[756.0, 812.0], [805.0, 812.0], [805.0, 830.0], [756.0, 830.0]]
+[[820.0, 803.0], [1085.0, 801.0], [1085.0, 836.0], [820.0, 838.0]]
+[[393.0, 801.0], [715.0, 805.0], [715.0, 839.0], [393.0, 836.0]]
+......
+```
+
+* only recognition
+```bash
+paddleocr --image_dir PaddleOCR/doc/imgs_words_en/word_10.png --det false
+```
+
+Output will be a list, each item contains text and recognition confidence
+```bash
+['PAIN', 0.990372]
+```
+
+## Use custom model
+When the built-in model cannot meet the needs, you need to use your own trained model.
+First, refer to the first section of [inference_en.md](./inference_en.md) to convert your det and rec model to inference model, and then use it as follows
+
+### 1. Use by code
+
+```python
+from paddleocr import PaddleOCR,draw_ocr
+# The path of detection and recognition model must contain model and params files
+ocr = PaddleOCR(det_model_dir='{your_det_model_dir}',rec_model_dir='{your_rec_model_dir}å')
+img_path = 'PaddleOCR/doc/imgs_en/img_12.jpg'
+result = ocr.ocr(img_path)
+for line in result:
+    print(line)
+
+# draw result
+from PIL import Image
+image = Image.open(img_path).convert('RGB')
+boxes = [line[0] for line in result]
+txts = [line[1][0] for line in result]
+scores = [line[1][1] for line in result]
+im_show = draw_ocr(image, boxes, txts, scores, font_path='/path/to/PaddleOCR/doc/simfang.ttf')
+im_show = Image.fromarray(im_show)
+im_show.save('result.jpg')
+```
+
+### Use by command line
+
+```bash
+paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_dir} --rec_model_dir {your_rec_model_dir}
+```
+
+## Parameter Description
+
+| Parameter                    | Description                                                                                                                                                                                                                 | Default value                  |
+|-------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------|
+| use_gpu                 | use GPU or not                                                                                                                                                                                                          | TRUE                    |
+| gpu_mem                 | GPU memory size used for initialization                                                                                                                                                                                              | 8000M                   |
+| image_dir               | The images path or folder path for predicting when used by the command line                                                                                                                                                                           |                         |
+| det_algorithm           | Type of detection algorithm selected                                                                                                                                                                                                   | DB                      |
+| det_model_dir           | the text detection inference model folder. There are two ways to transfer parameters, 1. None: Automatically download the built-in model to `~/.paddleocr/det`; 2. The path of the inference model converted by yourself, the model and params files must be included in the model path | None           |
+| det_max_side_len        | The maximum size of the long side of the image. When the long side exceeds this value, the long side will be resized to this size, and the short side will be scaled proportionally                                                                                                                         | 960                     |
+| det_db_thresh           | Binarization threshold value of DB output map                                                                                                                                                                                        | 0.3                     |
+| det_db_box_thresh       | The threshold value of the DB output box. Boxes score lower than this value will be discarded                                                                                                                                                                         | 0.5                     |
+| det_db_unclip_ratio     | The expanded ratio of DB output box                                                                                                                                                                                             | 2                       |
+| det_east_score_thresh   | Binarization threshold value of EAST output map                                                                                                                                                                                       | 0.8                     |
+| det_east_cover_thresh   | The threshold value of the EAST output box. Boxes score lower than this value will be discarded                                                                                                                                                                         | 0.1                     |
+| det_east_nms_thresh     | The NMS threshold value of EAST model output box                                                                                                                                                                                              | 0.2                     |
+| rec_algorithm           | Type of recognition algorithm selected                                                                                                                                                                                                | CRNN                    |
+| rec_model_dir           | the text recognition inference model folder. There are two ways to transfer parameters, 1. None: Automatically download the built-in model to `~/.paddleocr/rec`; 2. The path of the inference model converted by yourself, the model and params files must be included in the model path | None |
+| rec_image_shape         | image shape of recognition algorithm                                                                                                                                                                                            | "3,32,320"              |
+| rec_char_type           | Character type of recognition algorithm, Chinese (ch) or English (en)                                                                                                                                                                               | ch                      |
+| rec_batch_num           | When performing recognition, the batchsize of forward images                                                                                                                                                                                         | 30                      |
+| max_text_length         | The maximum text length that the recognition algorithm can recognize                                                                                                                                                                                         | 25                      |
+| rec_char_dict_path      | the alphabet path which needs to be modified to your own path when `rec_model_Name` use mode 2                                                                                                                                              | ./ppocr/utils/ppocr_keys_v1.txt                        |
+| use_space_char          | Whether to recognize spaces                                                                                                                                                                                                         | TRUE                    |
+| enable_mkldnn           | Whether to enable mkldnn                                                                                                                                                                                                       | FALSE                   |
+| det                     | Enable detction when `ppocr.ocr` func exec                                                                                                                                                                                                   | TRUE                    |
+| rec                     | Enable detction when `ppocr.ocr` func exec                                                                                                                                                                                                   | TRUE                    |
--- a/doc/imgs_en/img623.jpg
+++ b/doc/imgs_en/img623.jpg
--- a/doc/imgs_results/det_res_img623_sast.jpg
+++ b/doc/imgs_results/det_res_img623_sast.jpg
--- a/doc/imgs_results/det_res_img_10_sast.jpg
+++ b/doc/imgs_results/det_res_img_10_sast.jpg
--- a/doc/imgs_results/whl/11_det.jpg
+++ b/doc/imgs_results/whl/11_det.jpg
--- a/doc/imgs_results/whl/11_det_rec.jpg
+++ b/doc/imgs_results/whl/11_det_rec.jpg
--- a/doc/imgs_results/whl/12_det.jpg
+++ b/doc/imgs_results/whl/12_det.jpg
--- a/doc/imgs_results/whl/12_det_rec.jpg
+++ b/doc/imgs_results/whl/12_det_rec.jpg
--- a/paddleocr.py
+++ b/paddleocr.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+
+__dir__ = os.path.dirname(__file__)
+sys.path.append(os.path.join(__dir__, ''))
+
+import cv2
+import numpy as np
+from pathlib import Path
+import tarfile
+import requests
+from tqdm import tqdm
+
+from tools.infer import predict_system
+from ppocr.utils.utility import initial_logger
+
+logger = initial_logger()
+from ppocr.utils.utility import check_and_read_gif, get_image_file_list
+
+__all__ = ['PaddleOCR']
+
+model_params = {
+    'det': 'https://paddleocr.bj.bcebos.com/ch_models/ch_det_mv3_db_infer.tar',
+    'rec':
+    'https://paddleocr.bj.bcebos.com/ch_models/ch_rec_mv3_crnn_enhance_infer.tar',
+}
+
+SUPPORT_DET_MODEL = ['DB']
+SUPPORT_REC_MODEL = ['CRNN']
+BASE_DIR = os.path.expanduser("~/.paddleocr/")
+
+
+def download_with_progressbar(url, save_path):
+    response = requests.get(url, stream=True)
+    total_size_in_bytes = int(response.headers.get('content-length', 0))
+    block_size = 1024  # 1 Kibibyte
+    progress_bar = tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True)
+    with open(save_path, 'wb') as file:
+        for data in response.iter_content(block_size):
+            progress_bar.update(len(data))
+            file.write(data)
+    progress_bar.close()
+    if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes:
+        logger.error("ERROR, something went wrong")
+        sys.exit(0)
+
+
+def maybe_download(model_storage_directory, url):
+    # using custom model
+    if not os.path.exists(os.path.join(
+            model_storage_directory, 'model')) or not os.path.exists(
+                os.path.join(model_storage_directory, 'params')):
+        tmp_path = os.path.join(model_storage_directory, url.split('/')[-1])
+        print('download {} to {}'.format(url, tmp_path))
+        os.makedirs(model_storage_directory, exist_ok=True)
+        download_with_progressbar(url, tmp_path)
+        with tarfile.open(tmp_path, 'r') as tarObj:
+            for member in tarObj.getmembers():
+                if "model" in member.name:
+                    filename = 'model'
+                elif "params" in member.name:
+                    filename = 'params'
+                else:
+                    continue
+                file = tarObj.extractfile(member)
+                with open(
+                        os.path.join(model_storage_directory, filename),
+                        'wb') as f:
+                    f.write(file.read())
+        os.remove(tmp_path)
+
+
+def parse_args():
+    import argparse
+
+    def str2bool(v):
+        return v.lower() in ("true", "t", "1")
+
+    parser = argparse.ArgumentParser()
+    # params for prediction engine
+    parser.add_argument("--use_gpu", type=str2bool, default=True)
+    parser.add_argument("--ir_optim", type=str2bool, default=True)
+    parser.add_argument("--use_tensorrt", type=str2bool, default=False)
+    parser.add_argument("--gpu_mem", type=int, default=8000)
+
+    # params for text detector
+    parser.add_argument("--image_dir", type=str)
+    parser.add_argument("--det_algorithm", type=str, default='DB')
+    parser.add_argument("--det_model_dir", type=str, default=None)
+    parser.add_argument("--det_max_side_len", type=float, default=960)
+
+    # DB parmas
+    parser.add_argument("--det_db_thresh", type=float, default=0.3)
+    parser.add_argument("--det_db_box_thresh", type=float, default=0.5)
+    parser.add_argument("--det_db_unclip_ratio", type=float, default=2.0)
+
+    # EAST parmas
+    parser.add_argument("--det_east_score_thresh", type=float, default=0.8)
+    parser.add_argument("--det_east_cover_thresh", type=float, default=0.1)
+    parser.add_argument("--det_east_nms_thresh", type=float, default=0.2)
+
+    # params for text recognizer
+    parser.add_argument("--rec_algorithm", type=str, default='CRNN')
+    parser.add_argument("--rec_model_dir", type=str, default=None)
+    parser.add_argument("--rec_image_shape", type=str, default="3, 32, 320")
+    parser.add_argument("--rec_char_type", type=str, default='ch')
+    parser.add_argument("--rec_batch_num", type=int, default=30)
+    parser.add_argument("--max_text_length", type=int, default=25)
+    parser.add_argument(
+        "--rec_char_dict_path",
+        type=str,
+        default="./ppocr/utils/ppocr_keys_v1.txt")
+    parser.add_argument("--use_space_char", type=bool, default=True)
+    parser.add_argument("--enable_mkldnn", type=bool, default=False)
+
+    parser.add_argument("--det", type=str2bool, default=True)
+    parser.add_argument("--rec", type=str2bool, default=True)
+    return parser.parse_args()
+
+
+class PaddleOCR(predict_system.TextSystem):
+    def __init__(self, **kwargs):
+        """
+        paddleocr package
+        args:
+            **kwargs: other params show in paddleocr --help
+        """
+        postprocess_params = parse_args()
+        postprocess_params.__dict__.update(**kwargs)
+
+        # init model dir
+        if postprocess_params.det_model_dir is None:
+            postprocess_params.det_model_dir = os.path.join(BASE_DIR, 'det')
+        if postprocess_params.rec_model_dir is None:
+            postprocess_params.rec_model_dir = os.path.join(BASE_DIR, 'rec')
+        print(postprocess_params)
+        # download model
+        maybe_download(postprocess_params.det_model_dir, model_params['det'])
+        maybe_download(postprocess_params.rec_model_dir, model_params['rec'])
+
+        if postprocess_params.det_algorithm not in SUPPORT_DET_MODEL:
+            logger.error('det_algorithm must in {}'.format(SUPPORT_DET_MODEL))
+            sys.exit(0)
+        if postprocess_params.rec_algorithm not in SUPPORT_REC_MODEL:
+            logger.error('rec_algorithm must in {}'.format(SUPPORT_REC_MODEL))
+            sys.exit(0)
+
+        postprocess_params.rec_char_dict_path = Path(
+            __file__).parent / postprocess_params.rec_char_dict_path
+
+        # init det_model and rec_model
+        super().__init__(postprocess_params)
+
+    def ocr(self, img, det=True, rec=True):
+        """
+        ocr with paddleocr
+        args：
+            img: img for ocr, support ndarray, img_path and list or ndarray
+            det: use text detection or not, if false, only rec will be exec. default is True
+            rec: use text recognition or not, if false, only det will be exec. default is True
+        """
+        assert isinstance(img, (np.ndarray, list, str))
+        if isinstance(img, str):
+            image_file = img
+            img, flag = check_and_read_gif(image_file)
+            if not flag:
+                img = cv2.imread(image_file)
+            if img is None:
+                logger.error("error in loading image:{}".format(image_file))
+                return None
+        if det and rec:
+            dt_boxes, rec_res = self.__call__(img)
+            return [[box.tolist(), res] for box, res in zip(dt_boxes, rec_res)]
+        elif det and not rec:
+            dt_boxes, elapse = self.text_detector(img)
+            if dt_boxes is None:
+                return None
+            return [box.tolist() for box in dt_boxes]
+        else:
+            if not isinstance(img, list):
+                img = [img]
+            rec_res, elapse = self.text_recognizer(img)
+            return rec_res
+
+
+def main():
+    # for com
+    args = parse_args()
+    image_file_list = get_image_file_list(args.image_dir)
+    if len(image_file_list) == 0:
+        logger.error('no images find in {}'.format(args.image_dir))
+        return
+    ocr_engine = PaddleOCR()
+    for img_path in image_file_list:
+        print(img_path)
+        result = ocr_engine.ocr(img_path, det=args.det, rec=args.rec)
+        for line in result:
+            print(line)
--- a/ppocr/data/rec/dataset_traversal.py
+++ b/ppocr/data/rec/dataset_traversal.py
@@ -214,6 +214,8 @@ class SimpleReader(object):
        self.mode = params['mode']
        self.infer_img = params['infer_img']
        self.use_tps = False
+        if "num_heads" in params:
+            self.num_heads = params['num_heads']
        if "tps" in params:
            self.use_tps = True
        self.use_distort = False
@@ -237,7 +239,7 @@ class SimpleReader(object):

        def get_device_num():
            if self.use_gpu:
-                gpus = os.environ.get("CUDA_VISIBLE_DEVICES", 1)
+                gpus = os.environ.get("CUDA_VISIBLE_DEVICES", '1')
                gpu_num = len(gpus.split(','))
                return gpu_num
            else:
@@ -251,6 +253,13 @@ class SimpleReader(object):
                    img = cv2.imread(single_img)
                    if img.shape[-1] == 1 or len(list(img.shape)) == 2:
                        img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
+                    if self.loss_type == 'srn':
+                        norm_img = process_image_srn(
+                            img=img,
+                            image_shape=self.image_shape,
+                            num_heads=self.num_heads,
+                            max_text_length=self.max_text_length)
+                    else:
                        norm_img = process_image(
                            img=img,
                            image_shape=self.image_shape,
@@ -286,6 +295,17 @@ class SimpleReader(object):
                        img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)

                    label = substr[1]
+                    if self.loss_type == "srn":
+                        outs = process_image_srn(
+                            img=img,
+                            image_shape=self.image_shape,
+                            num_heads=self.num_heads,
+                            max_text_length=self.max_text_length,
+                            label=label,
+                            char_ops=self.char_ops,
+                            loss_type=self.loss_type)
+
+                    else:
                        outs = process_image(
                            img=img,
                            image_shape=self.image_shape,

--- a/ppocr/data/rec/img_tools.py
+++ b/ppocr/data/rec/img_tools.py
@@ -410,7 +410,8 @@ def resize_norm_img_srn(img, image_shape):

 def srn_other_inputs(image_shape,
                     num_heads,
-                     max_text_length):
+                     max_text_length,
+                     char_num):

    imgC, imgH, imgW = image_shape
    feature_dim = int((imgH / 8) * (imgW / 8))
@@ -418,7 +419,7 @@ def srn_other_inputs(image_shape,
    encoder_word_pos = np.array(range(0, feature_dim)).reshape((feature_dim, 1)).astype('int64')
    gsrm_word_pos = np.array(range(0, max_text_length)).reshape((max_text_length, 1)).astype('int64')

-    lbl_weight = np.array([37] * max_text_length).reshape((-1,1)).astype('int64')
+    lbl_weight = np.array([int(char_num-1)] * max_text_length).reshape((-1,1)).astype('int64')

    gsrm_attn_bias_data = np.ones((1, max_text_length, max_text_length)) 
    gsrm_slf_attn_bias1 = np.triu(gsrm_attn_bias_data, 1).reshape([-1, 1, max_text_length, max_text_length])
@@ -441,17 +442,18 @@ def process_image_srn(img,
                      loss_type=None):
    norm_img = resize_norm_img_srn(img, image_shape)
    norm_img = norm_img[np.newaxis, :]
+    char_num = char_ops.get_char_num()
+
    [lbl_weight, encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1, gsrm_slf_attn_bias2] = \
-        srn_other_inputs(image_shape, num_heads, max_text_length)
+        srn_other_inputs(image_shape, num_heads, max_text_length,char_num)

    if label is not None:
-        char_num = char_ops.get_char_num()
        text = char_ops.encode(label)
        if len(text) == 0 or len(text) > max_text_length:
            return None
        else:
            if loss_type == "srn":
-                text_padded = [37] * max_text_length
+                text_padded = [int(char_num-1)] * max_text_length
                for i in range(len(text)):
                    text_padded[i] = text[i]
                    lbl_weight[i] = [1.0]

--- a/ppocr/modeling/backbones/rec_resnet50_fpn.py
+++ b/ppocr/modeling/backbones/rec_resnet50_fpn.py
@@ -22,12 +22,12 @@ import paddle
 import paddle.fluid as fluid
 from paddle.fluid.param_attr import ParamAttr

-
-__all__ = ["ResNet", "ResNet18", "ResNet34", "ResNet50", "ResNet101", "ResNet152"]
+__all__ = [
+    "ResNet", "ResNet18", "ResNet34", "ResNet50", "ResNet101", "ResNet152"
+]

 Trainable = True
-w_nolr = fluid.ParamAttr(
-        trainable = Trainable)
+w_nolr = fluid.ParamAttr(trainable=Trainable)
 train_parameters = {
    "input_size": [3, 224, 224],
    "input_mean": [0.485, 0.456, 0.406],
@@ -40,12 +40,12 @@ train_parameters = {
    }
 }

+
 class ResNet():
    def __init__(self, params):
        self.layers = params['layers']
        self.params = train_parameters

-
    def __call__(self, input):
        layers = self.layers
        supported_layers = [18, 34, 50, 101, 152]
@@ -60,11 +60,16 @@ class ResNet():
            depth = [3, 4, 23, 3]
        elif layers == 152:
            depth = [3, 8, 36, 3]
-        stride_list = [(2,2),(2,2),(1,1),(1,1)]
+        stride_list = [(2, 2), (2, 2), (1, 1), (1, 1)]
        num_filters = [64, 128, 256, 512]

        conv = self.conv_bn_layer(
-            input=input, num_filters=64, filter_size=7, stride=2, act='relu', name="conv1")
+            input=input,
+            num_filters=64,
+            filter_size=7,
+            stride=2,
+            act='relu',
+            name="conv1")
        F = []
        if layers >= 50:
            for block in range(len(depth)):
@@ -79,26 +84,67 @@ class ResNet():
                    conv = self.bottleneck_block(
                        input=conv,
                        num_filters=num_filters[block],
-                        stride=stride_list[block]  if i == 0 else 1, name=conv_name)
+                        stride=stride_list[block] if i == 0 else 1,
+                        name=conv_name)
+                F.append(conv)
+        else:
+            for block in range(len(depth)):
+                for i in range(depth[block]):
+                    conv_name = "res" + str(block + 2) + chr(97 + i)
+
+                    if i == 0 and block != 0:
+                        stride = (2, 1)
+                    else:
+                        stride = (1, 1)
+
+                    conv = self.basic_block(
+                        input=conv,
+                        num_filters=num_filters[block],
+                        stride=stride,
+                        if_first=block == i == 0,
+                        name=conv_name)
                F.append(conv)

        base = F[-1]
        for i in [-2, -3]:
            b, c, w, h = F[i].shape
-            if (w,h) == base.shape[2:]:
+            if (w, h) == base.shape[2:]:
                base = base
            else:
-                base = fluid.layers.conv2d_transpose( input=base, num_filters=c,filter_size=4, stride=2,
-                    padding=1,act=None,
+                base = fluid.layers.conv2d_transpose(
+                    input=base,
+                    num_filters=c,
+                    filter_size=4,
+                    stride=2,
+                    padding=1,
+                    act=None,
                    param_attr=w_nolr,
                    bias_attr=w_nolr)
-                base = fluid.layers.batch_norm(base, act = "relu", param_attr=w_nolr, bias_attr=w_nolr)
+                base = fluid.layers.batch_norm(
+                    base, act="relu", param_attr=w_nolr, bias_attr=w_nolr)
            base = fluid.layers.concat([base, F[i]], axis=1)
-            base = fluid.layers.conv2d(base, num_filters=c, filter_size=1, param_attr=w_nolr, bias_attr=w_nolr)
-            base = fluid.layers.conv2d(base, num_filters=c, filter_size=3,padding = 1, param_attr=w_nolr, bias_attr=w_nolr)
-            base = fluid.layers.batch_norm(base, act = "relu", param_attr=w_nolr, bias_attr=w_nolr)
+            base = fluid.layers.conv2d(
+                base,
+                num_filters=c,
+                filter_size=1,
+                param_attr=w_nolr,
+                bias_attr=w_nolr)
+            base = fluid.layers.conv2d(
+                base,
+                num_filters=c,
+                filter_size=3,
+                padding=1,
+                param_attr=w_nolr,
+                bias_attr=w_nolr)
+            base = fluid.layers.batch_norm(
+                base, act="relu", param_attr=w_nolr, bias_attr=w_nolr)

-        base = fluid.layers.conv2d(base, num_filters=512, filter_size=1,bias_attr=w_nolr,param_attr=w_nolr)
+        base = fluid.layers.conv2d(
+            base,
+            num_filters=512,
+            filter_size=1,
+            bias_attr=w_nolr,
+            param_attr=w_nolr)

        return base

@@ -113,13 +159,14 @@ class ResNet():
        conv = fluid.layers.conv2d(
            input=input,
            num_filters=num_filters,
-            filter_size= 2  if stride==(1,1)  else filter_size,
-            dilation = 2 if stride==(1,1) else 1,
+            filter_size=2 if stride == (1, 1) else filter_size,
+            dilation=2 if stride == (1, 1) else 1,
            stride=stride,
            padding=(filter_size - 1) // 2,
            groups=groups,
            act=None,
-            param_attr=ParamAttr(name=name + "_weights",trainable = Trainable),
+            param_attr=ParamAttr(
+                name=name + "_weights", trainable=Trainable),
            bias_attr=False,
            name=name + '.conv2d.output.1')

@@ -127,18 +174,21 @@ class ResNet():
            bn_name = "bn_" + name
        else:
            bn_name = "bn" + name[3:]
-        return fluid.layers.batch_norm(input=conv,
+        return fluid.layers.batch_norm(
+            input=conv,
            act=act,
            name=bn_name + '.output.1',
-                                       param_attr=ParamAttr(name=bn_name + '_scale',trainable = Trainable),
-                                       bias_attr=ParamAttr(bn_name + '_offset',trainable = Trainable),
+            param_attr=ParamAttr(
+                name=bn_name + '_scale', trainable=Trainable),
+            bias_attr=ParamAttr(
+                bn_name + '_offset', trainable=Trainable),
            moving_mean_name=bn_name + '_mean',
            moving_variance_name=bn_name + '_variance', )

    def shortcut(self, input, ch_out, stride, is_first, name):
        ch_in = input.shape[1]
        if ch_in != ch_out or stride != 1 or is_first == True:
-            if stride == (1,1):
+            if stride == (1, 1):
                return self.conv_bn_layer(input, ch_out, 1, 1, name=name)
            else:  #stride == (2,2)
                return self.conv_bn_layer(input, ch_out, 1, stride, name=name)
@@ -148,7 +198,11 @@ class ResNet():

    def bottleneck_block(self, input, num_filters, stride, name):
        conv0 = self.conv_bn_layer(
-            input=input, num_filters=num_filters, filter_size=1, act='relu', name=name + "_branch2a")
+            input=input,
+            num_filters=num_filters,
+            filter_size=1,
+            act='relu',
+            name=name + "_branch2a")
        conv1 = self.conv_bn_layer(
            input=conv0,
            num_filters=num_filters,
@@ -157,16 +211,36 @@ class ResNet():
            act='relu',
            name=name + "_branch2b")
        conv2 = self.conv_bn_layer(
-            input=conv1, num_filters=num_filters * 4, filter_size=1, act=None, name=name + "_branch2c")
+            input=conv1,
+            num_filters=num_filters * 4,
+            filter_size=1,
+            act=None,
+            name=name + "_branch2c")

-        short = self.shortcut(input, num_filters * 4, stride, is_first=False, name=name + "_branch1")
+        short = self.shortcut(
+            input,
+            num_filters * 4,
+            stride,
+            is_first=False,
+            name=name + "_branch1")

-        return fluid.layers.elementwise_add(x=short, y=conv2, act='relu', name=name + ".add.output.5")
+        return fluid.layers.elementwise_add(
+            x=short, y=conv2, act='relu', name=name + ".add.output.5")

    def basic_block(self, input, num_filters, stride, is_first, name):
-        conv0 = self.conv_bn_layer(input=input, num_filters=num_filters, filter_size=3, act='relu', stride=stride,
+        conv0 = self.conv_bn_layer(
+            input=input,
+            num_filters=num_filters,
+            filter_size=3,
+            act='relu',
+            stride=stride,
            name=name + "_branch2a")
-        conv1 = self.conv_bn_layer(input=conv0, num_filters=num_filters, filter_size=3, act=None,
+        conv1 = self.conv_bn_layer(
+            input=conv0,
+            num_filters=num_filters,
+            filter_size=3,
+            act=None,
            name=name + "_branch2b")
-        short = self.shortcut(input, num_filters, stride, is_first, name=name + "_branch1")
+        short = self.shortcut(
+            input, num_filters, stride, is_first, name=name + "_branch1")
        return fluid.layers.elementwise_add(x=short, y=conv1, act='relu')
--- a/ppocr/modeling/heads/det_sast_head.py
+++ b/ppocr/modeling/heads/det_sast_head.py
@@ -49,7 +49,7 @@ class SASTHead(object):
        for i in range(4):
            if i == 0:
                g[i] = deconv_bn_layer(input=h[i], num_filters=num_outputs[i + 1], act=None, name='fpn_up_g0')
-                print("g[{}] shape: {}".format(i, g[i].shape))
+                #print("g[{}] shape: {}".format(i, g[i].shape))
            else:
                g[i] = fluid.layers.elementwise_add(x=g[i - 1], y=h[i])
                g[i] = fluid.layers.relu(g[i])
@@ -58,7 +58,7 @@ class SASTHead(object):
                g[i] = conv_bn_layer(input=g[i], num_filters=num_outputs[i],
                                    filter_size=3, stride=1, act='relu', name='fpn_up_g%d_1'%i)
                g[i] = deconv_bn_layer(input=g[i], num_filters=num_outputs[i + 1], act=None, name='fpn_up_g%d_2'%i)
-                print("g[{}] shape: {}".format(i, g[i].shape))
+                #print("g[{}] shape: {}".format(i, g[i].shape))

        g[4] = fluid.layers.elementwise_add(x=g[3], y=h[4])
        g[4] = fluid.layers.relu(g[4])

--- a/ppocr/optimizer.py
+++ b/ppocr/optimizer.py
@@ -14,14 +14,50 @@
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
+import math
 import paddle.fluid as fluid
 from paddle.fluid.regularizer import L2Decay
+from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter
+import paddle.fluid.layers.ops as ops

 from ppocr.utils.utility import initial_logger

 logger = initial_logger()


+def cosine_decay_with_warmup(learning_rate,
+                             step_each_epoch,
+                             epochs=500,
+                             warmup_minibatch=1000):
+    """Applies cosine decay to the learning rate.
+    lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1)
+    decrease lr for every mini-batch and start with warmup.
+    """
+    global_step = _decay_step_counter()
+    lr = fluid.layers.tensor.create_global_var(
+        shape=[1],
+        value=0.0,
+        dtype='float32',
+        persistable=True,
+        name="learning_rate")
+
+    warmup_minibatch = fluid.layers.fill_constant(
+        shape=[1],
+        dtype='float32',
+        value=float(warmup_minibatch),
+        force_cpu=True)
+
+    with fluid.layers.control_flow.Switch() as switch:
+        with switch.case(global_step < warmup_minibatch):
+            decayed_lr = learning_rate * (1.0 * global_step / warmup_minibatch)
+            fluid.layers.tensor.assign(input=decayed_lr, output=lr)
+        with switch.default():
+            decayed_lr = learning_rate * \
+                (ops.cos((global_step - warmup_minibatch) * (math.pi / (epochs * step_each_epoch))) + 1)/2
+            fluid.layers.tensor.assign(input=decayed_lr, output=lr)
+    return lr
+
+
 def AdamDecay(params, parameter_list=None):
    """
    define optimizer function
@@ -36,7 +72,9 @@ def AdamDecay(params, parameter_list=None):
    l2_decay = params.get("l2_decay", 0.0)

    if 'decay' in params:
-        supported_decay_mode = ["cosine_decay", "piecewise_decay"]
+        supported_decay_mode = [
+            "cosine_decay", "cosine_decay_warmup", "piecewise_decay"
+        ]
        params = params['decay']
        decay_mode = params['function']
        assert decay_mode in supported_decay_mode, "Supported decay mode is {}, but got {}".format(
@@ -49,6 +87,15 @@ def AdamDecay(params, parameter_list=None):
                learning_rate=base_lr,
                step_each_epoch=step_each_epoch,
                epochs=total_epoch)
+        elif decay_mode == "cosine_decay_warmup":
+            step_each_epoch = params['step_each_epoch']
+            total_epoch = params['total_epoch']
+            warmup_minibatch = params.get("warmup_minibatch", 1000)
+            base_lr = cosine_decay_with_warmup(
+                learning_rate=base_lr,
+                step_each_epoch=step_each_epoch,
+                epochs=total_epoch,
+                warmup_minibatch=warmup_minibatch)
        elif decay_mode == "piecewise_decay":
            boundaries = params["boundaries"]
            decay_rate = params["decay_rate"]

--- a/ppocr/postprocess/east_postprocess.py
+++ b/ppocr/postprocess/east_postprocess.py
@@ -22,9 +22,9 @@ import cv2

 import os
 import sys
-__dir__ = os.path.dirname(__file__)
+__dir__ = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(__dir__)
-sys.path.append(os.path.join(__dir__, '..'))
+sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))


 class EASTPostPocess(object):

--- a/ppocr/postprocess/lanms/.ycm_extra_conf.py
+++ b/ppocr/postprocess/lanms/.ycm_extra_conf.py
@@ -25,7 +25,7 @@ import ycm_core
 # These are the compilation flags that will be used in case there's no
 # compilation database set (by default, one is not set).
 # CHANGE THIS LIST OF FLAGS. YES, THIS IS THE DROID YOU HAVE BEEN LOOKING FOR.
-sys.path.append(os.path.dirname(__file__))
+sys.path.append(os.path.dirname(os.path.abspath(__file__)))


 BASE_DIR = os.path.dirname(os.path.realpath(__file__))

--- a/ppocr/utils/character.py
+++ b/ppocr/utils/character.py
@@ -26,8 +26,6 @@ class CharacterOps(object):
        self.character_type = config['character_type']
        self.loss_type = config['loss_type']
        self.max_text_len = config['max_text_length']
-        if self.loss_type == "srn" and self.character_type != "en":
-            raise Exception("SRN can only support in character_type == en")
        if self.character_type == "en":
            self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
            dict_character = list(self.character_str)
@@ -160,13 +158,15 @@ def cal_predicts_accuracy_srn(char_ops,
    acc_num = 0
    img_num = 0

+    char_num = char_ops.get_char_num()
+
    total_len = preds.shape[0]
    img_num = int(total_len / max_text_len)
    for i in range(img_num):
        cur_label = []
        cur_pred = []
        for j in range(max_text_len):
-            if labels[j + i * max_text_len] != 37:  #0
+            if labels[j + i * max_text_len] != int(char_num-1):  #0
                cur_label.append(labels[j + i * max_text_len][0])
            else:
                break
@@ -178,7 +178,7 @@ def cal_predicts_accuracy_srn(char_ops,
            elif j == len(cur_label) and j == max_text_len:
                acc_num += 1
                break
-            elif j == len(cur_label) and preds[j + i * max_text_len][0] == 37:
+            elif j == len(cur_label) and preds[j + i * max_text_len][0] == int(char_num-1):
                acc_num += 1
                break
    acc = acc_num * 1.0 / img_num

--- a/requirments.txt
+++ b/requirments.txt
@@ -2,3 +2,5 @@ shapely
 imgaug
 pyclipper
 lmdb
+tqdm
+numpy
\ No newline at end of file
--- a/setup.py
+++ b/setup.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from setuptools import setup
+from io import open
+
+with open('requirments.txt', encoding="utf-8-sig") as f:
+    requirements = f.readlines()
+    requirements.append('tqdm')
+
+
+def readme():
+    with open('doc/doc_en/whl_en.md', encoding="utf-8-sig") as f:
+        README = f.read()
+    return README
+
+
+setup(
+    name='paddleocr',
+    packages=['paddleocr'],
+    package_dir={'paddleocr': ''},
+    include_package_data=True,
+    entry_points={"console_scripts": ["paddleocr= paddleocr.paddleocr:main"]},
+    version='0.0.3',
+    install_requires=requirements,
+    license='Apache License 2.0',
+    description='Awesome OCR toolkits based on PaddlePaddle （8.6M ultra-lightweight pre-trained model, support training and deployment among server, mobile, embeded and IoT devices',
+    long_description=readme(),
+    long_description_content_type='text/markdown',
+    url='https://github.com/PaddlePaddle/PaddleOCR',
+    download_url='https://github.com/PaddlePaddle/PaddleOCR.git',
+    keywords=[
+        'ocr textdetection textrecognition paddleocr crnn east star-net rosetta ocrlite db chineseocr chinesetextdetection chinesetextrecognition'
+    ],
+    classifiers=[
+        'Intended Audience :: Developers', 'Operating System :: OS Independent',
+        'Natural Language :: Chinese (Simplified)',
+        'Programming Language :: Python :: 3',
+        'Programming Language :: Python :: 3.2',
+        'Programming Language :: Python :: 3.3',
+        'Programming Language :: Python :: 3.4',
+        'Programming Language :: Python :: 3.5',
+        'Programming Language :: Python :: 3.6',
+        'Programming Language :: Python :: 3.7', 'Topic :: Utilities'
+    ], )
--- a/tools/eval.py
+++ b/tools/eval.py
@@ -18,9 +18,9 @@ from __future__ import print_function

 import os
 import sys
-__dir__ = os.path.dirname(__file__)
+__dir__ = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(__dir__)
-sys.path.append(os.path.join(__dir__, '..'))
+sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))


 def set_paddle_flags(**kwargs):