Merge branch 'develop' into develop

1a0848a4 · shaohua.zhang · GitHub · fce0a57d · f2ae24a9 · 1a0848a4
Unverified Commit 1a0848a4 authored Jul 30, 2020 by shaohua.zhang Committed by GitHub Jul 30, 2020
11 changed files
--- a/README.md
+++ b/README.md
@@ -212,3 +212,4 @@ We welcome all the contributions to PaddleOCR and appreciate for your feedback v
 - Many thanks to [lyl120117](https://github.com/lyl120117) for contributing the code for printing the network structure.
 - Thanks [xiangyubo](https://github.com/xiangyubo) for contributing the handwritten Chinese OCR datasets.
 - Thanks [authorfu](https://github.com/authorfu) for contributing Android demo  and [xiadeye](https://github.com/xiadeye) contributing iOS demo, respectively.
+- Thanks [BeyondYourself](https://github.com/BeyondYourself) for contributing many great suggestions and simplifying part of the code style.
--- a/README_cn.md
+++ b/README_cn.md
@@ -32,7 +32,7 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库，助力
 上图是超轻量级中文OCR模型效果展示，更多效果图请见[效果展示页面](./doc/doc_ch/visualization.md)。
 - 超轻量级中文OCR在线体验地址：https://www.paddlepaddle.org.cn/hub/scene/ocr
- 移动端DEMO体验(基于EasyEdge和Paddle-Lite, 支持iOS和Android系统)：[安装包二维码获取地址](https://ai.baidu.com/easyedge/app/openSource?from=paddlelite) 
+- 移动端DEMO体验(基于EasyEdge和Paddle-Lite, 支持iOS和Android系统)：[安装包二维码获取地址](https://ai.baidu.com/easyedge/app/openSource?from=paddlelite)
   Android手机也可以扫描下面二维码安装体验。
@@ -205,8 +205,9 @@ PaddleOCR文本识别算法的训练和使用请参考文档教程中[模型训
 ## 贡献代码
 我们非常欢迎你为PaddleOCR贡献代码，也十分感谢你的反馈。
- 非常感谢 [Khanh Tran](https://github.com/xxxpsyduck) 贡献了英文文档。
+- 非常感谢 [Khanh Tran](https://github.com/xxxpsyduck) 贡献了英文文档
 - 非常感谢 [zhangxin](https://github.com/ZhangXinNan)([Blog](https://blog.csdn.net/sdlypyzq)) 贡献新的可视化方式、添加.gitgnore、处理手动设置PYTHONPATH环境变量的问题
 - 非常感谢 [lyl120117](https://github.com/lyl120117) 贡献打印网络结构的代码
 - 非常感谢 [xiangyubo](https://github.com/xiangyubo) 贡献手写中文OCR数据集
 - 非常感谢 [authorfu](https://github.com/authorfu) 贡献Android和[xiadeye](https://github.com/xiadeye) 贡献IOS的demo代码
+- 非常感谢 [BeyondYourself](https://github.com/BeyondYourself) 给PaddleOCR提了很多非常棒的建议，并简化了PaddleOCR的部分代码风格。
--- a/deploy/android_demo/README.md
+++ b/deploy/android_demo/README.md
 # 如何快速测试
 ### 1. 安装最新版本的Android Studio
-可以从https://developer.android.com/studio下载。本Demo使用是4.0版本Android Studio编写。
+可以从https://developer.android.com/studio 下载。本Demo使用是4.0版本Android Studio编写。
 ### 2. 按照NDK 20 以上版本 
 Demo测试的时候使用的是NDK 20b版本，20版本以上均可以支持编译成功。

--- a/doc/doc_ch/installation.md
+++ b/doc/doc_ch/installation.md
@@ -7,7 +7,7 @@ PaddleOCR 工作环境
 - glibc 2.23
 - cuDNN 7.6+ (GPU)
-建议使用我们提供的docker运行PaddleOCR，有关docker使用请参考[链接](https://docs.docker.com/get-started/)。
+建议使用我们提供的docker运行PaddleOCR，有关docker、nvidia-docker使用请参考[链接](https://docs.docker.com/get-started/)。
 *如您希望使用 mac 或 windows直接运行预测代码，可以从第2步开始执行。*

--- a/doc/doc_ch/recognition.md
+++ b/doc/doc_ch/recognition.md
@@ -21,12 +21,11 @@ ln -sf <path/to/dataset> <path/to/paddle_ocr>/train_data/dataset
 * 使用自己数据集：
 若您希望使用自己的数据进行训练，请参考下文组织您的数据。
 - 训练集
 首先请将训练图片放入同一个文件夹（train_images），并用一个txt文件（rec_gt_train.txt）记录图片路径和标签。
-* 注意： 默认请将图片路径和图片标签用 \t 分割，如用其他方式分割将造成训练报错
+**注意：** 默认请将图片路径和图片标签用 \t 分割，如用其他方式分割将造成训练报错
 ```
 " 图像文件名                 图像标注信息 "
@@ -41,12 +40,9 @@ PaddleOCR 提供了一份用于训练 icdar2015 数据集的标签文件，通
 wget -P ./train_data/ic15_data  https://paddleocr.bj.bcebos.com/dataset/rec_gt_train.txt
 # 测试集标签
 wget -P ./train_data/ic15_data  https://paddleocr.bj.bcebos.com/dataset/rec_gt_test.txt
 ```
 最终训练集应有如下文件结构：
 ```
 |-train_data
    |-ic15_data
@@ -150,7 +146,7 @@ PaddleOCR支持训练和评估交替进行, 可以在 `configs/rec/rec_icdar15_t
 如果验证集很大，测试将会比较耗时，建议减少评估次数，或训练完再进行评估。
-* 提示： 可通过 -c 参数选择 `configs/rec/` 路径下的多种模型配置进行训练，PaddleOCR支持的识别算法有：
+**提示：** 可通过 -c 参数选择 `configs/rec/` 路径下的多种模型配置进行训练，PaddleOCR支持的识别算法有：
 | 配置文件 |  算法名称 |   backbone |   trans   |   seq      |     pred     |

--- a/ppocr/data/det/db_process.py
+++ b/ppocr/data/det/db_process.py
@@ -17,7 +17,7 @@ import cv2
 import numpy as np
 import json
 import sys
-from ppocr.utils.utility import initial_logger
+from ppocr.utils.utility import initial_logger, check_and_read_gif
 logger = initial_logger()
 from .data_augment import AugmentData
@@ -100,7 +100,9 @@ class DBProcessTrain(object):
    def __call__(self, label_infor):
        img_path, gt_label = self.convert_label_infor(label_infor)
-        imgvalue = cv2.imread(img_path)
+        imgvalue, flag = check_and_read_gif(img_path)
+        if not flag:
+            imgvalue = cv2.imread(img_path)
        if imgvalue is None:
            logger.info("{} does not exist!".format(img_path))
            return None

--- a/ppocr/data/rec/dataset_traversal.py
+++ b/ppocr/data/rec/dataset_traversal.py
@@ -233,7 +233,7 @@ class SimpleReader(object):
                img_num = len(label_infor_list)
                img_id_list = list(range(img_num))
                random.shuffle(img_id_list)
-                if sys.platform == "win32":
+                if sys.platform == "win32" and self.num_workers != 1:
                    print("multiprocess is not fully compatible with Windows."
                          "num_workers will be 1.")
                    self.num_workers = 1

--- a/ppocr/utils/utility.py
+++ b/ppocr/utils/utility.py
@@ -15,6 +15,8 @@
 import logging
 import os
 import imghdr
+import cv2
+from paddle import fluid
 def initial_logger():
@@ -62,7 +64,7 @@ def get_image_file_list(img_file):
    if img_file is None or not os.path.exists(img_file):
        raise Exception("not found any img file in {}".format(img_file))
-    img_end = {'jpg', 'bmp', 'png', 'jpeg', 'rgb', 'tif', 'tiff'}
+    img_end = {'jpg', 'bmp', 'png', 'jpeg', 'rgb', 'tif', 'tiff', 'gif', 'GIF'}
    if os.path.isfile(img_file) and imghdr.what(img_file) in img_end:
        imgs_lists.append(img_file)
    elif os.path.isdir(img_file):
@@ -75,7 +77,18 @@ def get_image_file_list(img_file):
    return imgs_lists
-from paddle import fluid
+def check_and_read_gif(img_path):
+    if os.path.basename(img_path)[-3:] in ['gif', 'GIF']:
+        gif = cv2.VideoCapture(img_path)
+        ret, frame = gif.read()
+        if not ret:
+            logging.info("Cannot read {}. This gif image maybe corrupted.")
+            return None, False
+        if len(frame.shape) == 2 or frame.shape[-1] == 1:
+            frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB)
+        imgvalue = frame[:, :, ::-1]
+        return imgvalue, True
+    return None, False
 def create_multi_devices_program(program, loss_var_name):

--- a/tools/infer/predict_det.py
+++ b/tools/infer/predict_det.py
@@ -20,7 +20,7 @@ sys.path.append(os.path.join(__dir__, '../..'))
 import tools.infer.utility as utility
 from ppocr.utils.utility import initial_logger
 logger = initial_logger()
-from ppocr.utils.utility import get_image_file_list
+from ppocr.utils.utility import get_image_file_list, check_and_read_gif
 import cv2
 from ppocr.data.det.east_process import EASTProcessTest
 from ppocr.data.det.db_process import DBProcessTest
@@ -139,7 +139,9 @@ if __name__ == "__main__":
    if not os.path.exists(draw_img_save):
        os.makedirs(draw_img_save)
    for image_file in image_file_list:
-        img = cv2.imread(image_file)
+        img, flag = check_and_read_gif(image_file)
+        if not flag:
+            img = cv2.imread(image_file)
        if img is None:
            logger.info("error in loading image:{}".format(image_file))
            continue

--- a/tools/infer/predict_rec.py
+++ b/tools/infer/predict_rec.py
@@ -20,7 +20,7 @@ sys.path.append(os.path.abspath(os.path.join(__dir__, '../..')))
 import tools.infer.utility as utility
 from ppocr.utils.utility import initial_logger
 logger = initial_logger()
-from ppocr.utils.utility import get_image_file_list
+from ppocr.utils.utility import get_image_file_list, check_and_read_gif
 import cv2
 import copy
 import numpy as np
@@ -153,7 +153,9 @@ def main(args):
    valid_image_file_list = []
    img_list = []
    for image_file in image_file_list:
-        img = cv2.imread(image_file, cv2.IMREAD_COLOR)
+        img, flag = check_and_read_gif(image_file)
+        if not flag:
+            img = cv2.imread(image_file)
        if img is None:
            logger.info("error in loading image:{}".format(image_file))
            continue

--- a/tools/infer/predict_system.py
+++ b/tools/infer/predict_system.py
@@ -27,7 +27,7 @@ import copy
 import numpy as np
 import math
 import time
-from ppocr.utils.utility import get_image_file_list
+from ppocr.utils.utility import get_image_file_list, check_and_read_gif
 from PIL import Image
 from tools.infer.utility import draw_ocr
 from tools.infer.utility import draw_ocr_box_txt
@@ -49,18 +49,23 @@ class TextSystem(object):
        points[:, 0] = points[:, 0] - left
        points[:, 1] = points[:, 1] - top
        '''
-        img_crop_width = int(max(np.linalg.norm(points[0] - points[1]),
+        img_crop_width = int(
-                                 np.linalg.norm(points[2] - points[3])))
+            max(
-        img_crop_height = int(max(np.linalg.norm(points[0] - points[3]),
+                np.linalg.norm(points[0] - points[1]),
-                                  np.linalg.norm(points[1] - points[2])))
+                np.linalg.norm(points[2] - points[3])))
-        pts_std = np.float32([[0, 0],
+        img_crop_height = int(
-                              [img_crop_width, 0],
+            max(
+                np.linalg.norm(points[0] - points[3]),
+                np.linalg.norm(points[1] - points[2])))
+        pts_std = np.float32([[0, 0], [img_crop_width, 0],
                              [img_crop_width, img_crop_height],
                              [0, img_crop_height]])
        M = cv2.getPerspectiveTransform(points, pts_std)
-        dst_img = cv2.warpPerspective(img, M, (img_crop_width, img_crop_height),
+        dst_img = cv2.warpPerspective(
-                                      borderMode=cv2.BORDER_REPLICATE,
+            img,
-                                      flags=cv2.INTER_CUBIC)
+            M, (img_crop_width, img_crop_height),
+            borderMode=cv2.BORDER_REPLICATE,
+            flags=cv2.INTER_CUBIC)
        dst_img_height, dst_img_width = dst_img.shape[0:2]
        if dst_img_height * 1.0 / dst_img_width >= 1.5:
            dst_img = np.rot90(dst_img)
@@ -119,25 +124,27 @@ def main(args):
    is_visualize = True
    tackle_img_num = 0
    for image_file in image_file_list:
-        img = cv2.imread(image_file)
+        img, flag = check_and_read_gif(image_file)
+        if not flag:
+            img = cv2.imread(image_file)
        if img is None:
            logger.info("error in loading image:{}".format(image_file))
            continue
        starttime = time.time()
-        tackle_img_num += 1	
+        tackle_img_num += 1
-        if not args.use_gpu and args.enable_mkldnn and tackle_img_num % 30 == 0:	
+        if not args.use_gpu and args.enable_mkldnn and tackle_img_num % 30 == 0:
            text_sys = TextSystem(args)
        dt_boxes, rec_res = text_sys(img)
        elapse = time.time() - starttime
        print("Predict time of %s: %.3fs" % (image_file, elapse))
+        drop_score = 0.5
        dt_num = len(dt_boxes)
-        dt_boxes_final = []
        for dno in range(dt_num):
            text, score = rec_res[dno]
-            if score >= 0.5:
+            if score >= drop_score:
                text_str = "%s, %.3f" % (text, score)
                print(text_str)
-                dt_boxes_final.append(dt_boxes[dno])
        if is_visualize:
            image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
@@ -146,7 +153,12 @@ def main(args):
            scores = [rec_res[i][1] for i in range(len(rec_res))]
            draw_img = draw_ocr(
-                image, boxes, txts, scores, draw_txt=True, drop_score=0.5)
+                image,
+                boxes,
+                txts,
+                scores,
+                draw_txt=True,
+                drop_score=drop_score)
            draw_img_save = "./inference_results/"
            if not os.path.exists(draw_img_save):
                os.makedirs(draw_img_save)