Merge pull request #2 from PaddlePaddle/develop

mergepaddleocr

Merge pull request #2 from PaddlePaddle/develop
mergepaddleocr
c1d19ce2 · zhoujun · GitHub · 56c6c3ae · bad9f6cd · c1d19ce2
Unverified Commit c1d19ce2 authored Aug 13, 2020 by zhoujun Committed by GitHub Aug 13, 2020
20 changed files
--- a/doc/imgs_words/en/word_5.png
+++ b/doc/imgs_words/en/word_5.png
--- a/doc/imgs_words_en/.DS_Store
+++ b/doc/imgs_words_en/.DS_Store
--- a/doc/imgs_words_en/word_10.png
+++ b/doc/imgs_words_en/word_10.png
--- a/doc/imgs_words_en/word_116.png
+++ b/doc/imgs_words_en/word_116.png
--- a/doc/imgs_words_en/word_19.png
+++ b/doc/imgs_words_en/word_19.png
--- a/doc/imgs_words_en/word_201.png
+++ b/doc/imgs_words_en/word_201.png
--- a/doc/imgs_words_en/word_308.png
+++ b/doc/imgs_words_en/word_308.png
--- a/doc/imgs_words_en/word_336.png
+++ b/doc/imgs_words_en/word_336.png
--- a/doc/imgs_words_en/word_401.png
+++ b/doc/imgs_words_en/word_401.png
--- a/doc/imgs_words_en/word_461.png
+++ b/doc/imgs_words_en/word_461.png
--- a/doc/imgs_words_en/word_52.png
+++ b/doc/imgs_words_en/word_52.png
--- a/doc/imgs_words_en/word_545.png
+++ b/doc/imgs_words_en/word_545.png
--- a/doc/joinus.jpg
+++ b/doc/joinus.jpg
--- a/doc/ocr-android-easyedge.png
+++ b/doc/ocr-android-easyedge.png
--- a/doc/simfang.ttf
+++ b/doc/simfang.ttf
--- a/doc/tricks/long_text_examples.jpg
+++ b/doc/tricks/long_text_examples.jpg
--- a/ppocr/data/det/dataset_traversal.py
+++ b/ppocr/data/det/dataset_traversal.py
@@ -13,6 +13,7 @@
 #limitations under the License.

 import os
+import sys
 import math
 import random
 import functools
@@ -22,6 +23,7 @@ import string
 from ppocr.utils.utility import initial_logger
 logger = initial_logger()
 from ppocr.utils.utility import create_module
+from ppocr.utils.utility import get_image_file_list
 import time


@@ -34,12 +36,16 @@ class TrainReader(object):
            "absence process_function in Reader"
        self.process = create_module(params['process_function'])(params)

-    def __call__(self, process_id):
+    def __call__(self, process_id):     
+        with open(self.label_file_path, "rb") as fin:
+            label_infor_list = fin.readlines()
+        img_num = len(label_infor_list)
+        img_id_list = list(range(img_num))
+        if sys.platform == "win32" and self.num_workers != 1:
+            print("multiprocess is not fully compatible with Windows."
+                  "num_workers will be 1.")
+            self.num_workers = 1
        def sample_iter_reader():
-            with open(self.label_file_path, "rb") as fin:
-                label_infor_list = fin.readlines()
-            img_num = len(label_infor_list)
-            img_id_list = list(range(img_num))
            random.shuffle(img_id_list)
            for img_id in range(process_id, img_num, self.num_workers):
                label_infor = label_infor_list[img_id_list[img_id]]
@@ -55,8 +61,6 @@ class TrainReader(object):
                if len(batch_outs) == self.batch_size:
                    yield batch_outs
                    batch_outs = []
-            if len(batch_outs) != 0:
-                yield batch_outs

        return batch_iter_reader

@@ -72,34 +76,31 @@ class EvalTestReader(object):
            self.params)
        batch_size = self.params['test_batch_size_per_card']

-        flag_test_single_img = False
-        if mode == "test":
-            single_img_path = self.params['single_img_path']
-            if single_img_path is not None:
-                flag_test_single_img = True
-
        img_list = []
-        if flag_test_single_img:
-            img_list.append([single_img_path, single_img_path])
-        else:
+        if mode != "test":
            img_set_dir = self.params['img_set_dir']
            img_name_list_path = self.params['label_file_path']
            with open(img_name_list_path, "rb") as fin:
                lines = fin.readlines()
                for line in lines:
                    img_name = line.decode().strip("\n").split("\t")[0]
-                    img_path = img_set_dir + "/" + img_name
-                    img_list.append([img_path, img_name])
+                    img_path = os.path.join(img_set_dir, img_name)
+                    img_list.append(img_path)
+        else:
+            img_path = self.params['infer_img']
+            img_list = get_image_file_list(img_path)

        def batch_iter_reader():
            batch_outs = []
-            for img_path, img_name in img_list:
+            for img_path in img_list:
                img = cv2.imread(img_path)
                if img is None:
-                    logger.info("load image error:" + img_path)
+                    logger.info("{} does not exist!".format(img_path))
                    continue
+                elif len(list(img.shape)) == 2 or img.shape[2] == 1:
+                    img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
                outs = process_function(img)
-                outs.append(img_name)
+                outs.append(img_path)
                batch_outs.append(outs)
                if len(batch_outs) == batch_size:
                    yield batch_outs

--- a/ppocr/data/det/db_process.py
+++ b/ppocr/data/det/db_process.py
@@ -17,6 +17,8 @@ import cv2
 import numpy as np
 import json
 import sys
+from ppocr.utils.utility import initial_logger, check_and_read_gif
+logger = initial_logger()

 from .data_augment import AugmentData
 from .random_crop_data import RandomCropData
@@ -25,6 +27,10 @@ from .make_border_map import MakeBorderMap


 class DBProcessTrain(object):
+    """
+    DB pre-process for Train mode
+    """
+
    def __init__(self, params):
        self.img_set_dir = params['img_set_dir']
        self.image_shape = params['image_shape']
@@ -94,9 +100,14 @@ class DBProcessTrain(object):

    def __call__(self, label_infor):
        img_path, gt_label = self.convert_label_infor(label_infor)
-        imgvalue = cv2.imread(img_path)
+        imgvalue, flag = check_and_read_gif(img_path)
+        if not flag:
+            imgvalue = cv2.imread(img_path)
        if imgvalue is None:
+            logger.info("{} does not exist!".format(img_path))
            return None
+        if len(list(imgvalue.shape)) == 2 or imgvalue.shape[2] == 1:
+            imgvalue = cv2.cvtColor(imgvalue, cv2.COLOR_GRAY2BGR)
        data = self.make_data_dict(imgvalue, gt_label)
        data = AugmentData(data)
        data = RandomCropData(data, self.image_shape[1:])
@@ -109,11 +120,15 @@ class DBProcessTrain(object):


 class DBProcessTest(object):
+    """
+    DB pre-process for Test mode
+    """
+
    def __init__(self, params):
        super(DBProcessTest, self).__init__()
        self.resize_type = 0
-        if 'det_image_shape' in params:
-            self.image_shape = params['det_image_shape']
+        if 'test_image_shape' in params:
+            self.image_shape = params['test_image_shape']
            # print(self.image_shape)
            self.resize_type = 1
        if 'max_side_len' in params:
@@ -124,9 +139,10 @@ class DBProcessTest(object):
    def resize_image_type0(self, im):
        """
        resize image to a size multiple of 32 which is required by the network
-        :param im: the resized image
-        :param max_side_len: limit of max image size to avoid out of memory in gpu
-        :return: the resized image and the resize ratio
+        args:
+            img(array): array with shape [h, w, c]
+        return(tuple):
+            img, (ratio_h, ratio_w)
        """
        max_side_len = self.max_side_len
        h, w, _ = im.shape
@@ -146,12 +162,16 @@ class DBProcessTest(object):
        resize_w = int(resize_w * ratio)
        if resize_h % 32 == 0:
            resize_h = resize_h
+        elif resize_h // 32 <= 1:
+            resize_h = 32
        else:
-            resize_h = (resize_h // 32 + 1) * 32
+            resize_h = (resize_h // 32 - 1) * 32
        if resize_w % 32 == 0:
            resize_w = resize_w
+        elif resize_w // 32 <= 1:
+            resize_w = 32
        else:
-            resize_w = (resize_w // 32 + 1) * 32
+            resize_w = (resize_w // 32 - 1) * 32
        try:
            if int(resize_w) <= 0 or int(resize_h) <= 0:
                return None, (None, None)
@@ -176,8 +196,12 @@ class DBProcessTest(object):
        img_std = [0.229, 0.224, 0.225]
        im = im.astype(np.float32, copy=False)
        im = im / 255
-        im -= img_mean
-        im /= img_std
+        im[:, :, 0] -= img_mean[0]
+        im[:, :, 1] -= img_mean[1]
+        im[:, :, 2] -= img_mean[2]
+        im[:, :, 0] /= img_std[0]
+        im[:, :, 1] /= img_std[1]
+        im[:, :, 2] /= img_std[2]
        channel_swap = (2, 0, 1)
        im = im.transpose(channel_swap)
        return im

--- a/ppocr/data/det/east_process.py
+++ b/ppocr/data/det/east_process.py
@@ -16,7 +16,8 @@ import math
 import cv2
 import numpy as np
 import json
-
+import sys
+import os

 class EASTProcessTrain(object):
    def __init__(self, params):
@@ -52,7 +53,7 @@ class EASTProcessTrain(object):
        label_infor = label_infor.decode()
        label_infor = label_infor.encode('utf-8').decode('utf-8-sig')
        substr = label_infor.strip("\n").split("\t")
-        img_path = self.img_set_dir + substr[0]
+        img_path = os.path.join(self.img_set_dir, substr[0])
        label = json.loads(substr[1])
        nBox = len(label)
        wordBBs, txts, txt_tags = [], [], []
@@ -78,7 +79,7 @@ class EASTProcessTrain(object):
        dst_polys = []
        rand_degree_ratio = np.random.rand()
        rand_degree_cnt = 1
-        if rand_degree_ratio > 0.333 and rand_degree_ratio < 0.666:
+        if 0.333 < rand_degree_ratio < 0.666:
            rand_degree_cnt = 2
        elif rand_degree_ratio > 0.666:
            rand_degree_cnt = 3
@@ -138,7 +139,7 @@ class EASTProcessTrain(object):
                continue
            if p_area > 0:
                #'poly in wrong direction'
-                if tag == False:
+                if not tag:
                    tag = True  #reversed cases should be ignore
                poly = poly[(0, 3, 2, 1), :]
            validated_polys.append(poly)
@@ -455,17 +456,23 @@ class EASTProcessTrain(object):
 class EASTProcessTest(object):
    def __init__(self, params):
        super(EASTProcessTest, self).__init__()
+        self.resize_type = 0
+        if 'test_image_shape' in params:
+            self.image_shape = params['test_image_shape']
+            # print(self.image_shape)
+            self.resize_type = 1
        if 'max_side_len' in params:
            self.max_side_len = params['max_side_len']
        else:
            self.max_side_len = 2400

-    def resize_image(self, im):
+    def resize_image_type0(self, im):
        """
        resize image to a size multiple of 32 which is required by the network
-        :param im: the resized image
-        :param max_side_len: limit of max image size to avoid out of memory in gpu
-        :return: the resized image and the resize ratio
+        args:
+            img(array): array with shape [h, w, c]
+        return(tuple):
+            img, (ratio_h, ratio_w)
        """
        max_side_len = self.max_side_len
        h, w, _ = im.shape
@@ -485,19 +492,40 @@ class EASTProcessTest(object):
        resize_w = int(resize_w * ratio)
        if resize_h % 32 == 0:
            resize_h = resize_h
+        elif resize_h // 32 <= 1:
+            resize_h = 32
        else:
            resize_h = (resize_h // 32 - 1) * 32
        if resize_w % 32 == 0:
            resize_w = resize_w
+        elif resize_w // 32 <= 1:
+            resize_w = 32
        else:
            resize_w = (resize_w // 32 - 1) * 32
-        im = cv2.resize(im, (int(resize_w), int(resize_h)))
+        try:
+            if int(resize_w) <= 0 or int(resize_h) <= 0:
+                return None, (None, None)
+            im = cv2.resize(im, (int(resize_w), int(resize_h)))
+        except:
+            print(im.shape, resize_w, resize_h)
+            sys.exit(0)
        ratio_h = resize_h / float(h)
        ratio_w = resize_w / float(w)
        return im, (ratio_h, ratio_w)

+    def resize_image_type1(self, im):
+        resize_h, resize_w = self.image_shape
+        ori_h, ori_w = im.shape[:2]  # (h, w, c)
+        im = cv2.resize(im, (int(resize_w), int(resize_h)))
+        ratio_h = float(resize_h) / ori_h
+        ratio_w = float(resize_w) / ori_w
+        return im, (ratio_h, ratio_w)
+
    def __call__(self, im):
-        im, (ratio_h, ratio_w) = self.resize_image(im)
+        if self.resize_type == 0:
+            im, (ratio_h, ratio_w) = self.resize_image_type0(im)
+        else:
+            im, (ratio_h, ratio_w) = self.resize_image_type1(im)
        img_mean = [0.485, 0.456, 0.406]
        img_std = [0.229, 0.224, 0.225]
        im = im[:, :, ::-1].astype(np.float32)

--- a/ppocr/data/reader_main.py
+++ b/ppocr/data/reader_main.py
@@ -66,6 +66,8 @@ def reader_main(config=None, mode=None):
    reader_function = params['reader_function']
    function = create_module(reader_function)(params)
    if mode == "train":
+        if sys.platform == "win32":
+            return function(0)
        readers = []
        num_workers = params['num_workers']
        for process_id in range(num_workers):
@@ -73,9 +75,3 @@ def reader_main(config=None, mode=None):
        return paddle.reader.multiprocess_reader(readers, False)
    else:
        return function(mode)
-
-
-def test_reader(image_shape, img_path):
-    img = cv2.imread(img_path)
-    norm_img = process_image(img, image_shape)
-    return norm_img