Merge remote-tracking branch 'upstream/dygraph' into dy3

78d51971 · Leif · bd314018 · c683a181 · bd314018 · bd314018
Commit 78d51971 authored Dec 16, 2020 by Leif
15 changed files
--- a/doc/imgs_results_vis2/3.jpg
+++ b/doc/imgs_results_vis2/3.jpg
--- a/doc/imgs_results_vis2/4.jpg
+++ b/doc/imgs_results_vis2/4.jpg
--- a/doc/imgs_results_vis2/5.jpg
+++ b/doc/imgs_results_vis2/5.jpg
--- a/doc/imgs_results_vis2/6.jpg
+++ b/doc/imgs_results_vis2/6.jpg
--- a/doc/imgs_results_vis2/7.jpg
+++ b/doc/imgs_results_vis2/7.jpg
--- a/doc/imgs_results_vis2/8.jpg
+++ b/doc/imgs_results_vis2/8.jpg
--- a/doc/imgs_results_vis2/9.jpg
+++ b/doc/imgs_results_vis2/9.jpg
--- a/doc/imgs_words_en/.DS_Store
+++ b/doc/imgs_words_en/.DS_Store
--- a/ppocr/modeling/transforms/tps.py
+++ b/ppocr/modeling/transforms/tps.py
@@ -128,7 +128,7 @@ class LocalizationNetwork(nn.Layer):
        i = 0
        for block in self.block_list:
            x = block(x)
-        x = x.reshape([B, -1])
+        x = x.squeeze(axis=2).squeeze(axis=2)
        x = self.fc1(x)
        x = F.relu(x)
@@ -176,14 +176,14 @@ class GridGenerator(nn.Layer):
        Return:
            batch_P_prime: the grid for the grid_sampler
        """
-        C = self.build_C()
+        C = self.build_C_paddle()
-        P = self.build_P(I_r_size)
+        P = self.build_P_paddle(I_r_size)
-        inv_delta_C = self.build_inv_delta_C(C).astype('float32')
-        P_hat = self.build_P_hat(C, P).astype('float32')
+        inv_delta_C_tensor = self.build_inv_delta_C_paddle(C).astype('float32')
+        P_hat_tensor = self.build_P_hat_paddle(
+            C, paddle.to_tensor(P)).astype('float32')
-        inv_delta_C_tensor = paddle.to_tensor(inv_delta_C)
        inv_delta_C_tensor.stop_gradient = True
-        P_hat_tensor = paddle.to_tensor(P_hat)
        P_hat_tensor.stop_gradient = True
        batch_C_ex_part_tensor = self.get_expand_tensor(batch_C_prime)
@@ -196,71 +196,80 @@ class GridGenerator(nn.Layer):
        batch_P_prime = paddle.matmul(P_hat_tensor, batch_T)
        return batch_P_prime
-    def build_C(self):
+    def build_C_paddle(self):
        """ Return coordinates of fiducial points in I_r; C """
        F = self.F
-        ctrl_pts_x = np.linspace(-1.0, 1.0, int(F / 2))
+        ctrl_pts_x = paddle.linspace(-1.0, 1.0, int(F / 2))
-        ctrl_pts_y_top = -1 * np.ones(int(F / 2))
+        ctrl_pts_y_top = -1 * paddle.ones([int(F / 2)])
-        ctrl_pts_y_bottom = np.ones(int(F / 2))
+        ctrl_pts_y_bottom = paddle.ones([int(F / 2)])
-        ctrl_pts_top = np.stack([ctrl_pts_x, ctrl_pts_y_top], axis=1)
+        ctrl_pts_top = paddle.stack([ctrl_pts_x, ctrl_pts_y_top], axis=1)
-        ctrl_pts_bottom = np.stack([ctrl_pts_x, ctrl_pts_y_bottom], axis=1)
+        ctrl_pts_bottom = paddle.stack([ctrl_pts_x, ctrl_pts_y_bottom], axis=1)
-        C = np.concatenate([ctrl_pts_top, ctrl_pts_bottom], axis=0)
+        C = paddle.concat([ctrl_pts_top, ctrl_pts_bottom], axis=0)
        return C  # F x 2
-    def build_P(self, I_r_size):
+    def build_P_paddle(self, I_r_size):
-        I_r_width, I_r_height = I_r_size
+        I_r_height, I_r_width = I_r_size
-        I_r_grid_x = (np.arange(-I_r_width, I_r_width, 2) + 1.0) \
+        I_r_grid_x = (
-                     / I_r_width  # self.I_r_width
+            paddle.arange(-I_r_width, I_r_width, 2).astype('float32') + 1.0
-        I_r_grid_y = (np.arange(-I_r_height, I_r_height, 2) + 1.0) \
+        ) / I_r_width  # self.I_r_width
-                     / I_r_height  # self.I_r_height
+        I_r_grid_y = (
+            paddle.arange(-I_r_height, I_r_height, 2).astype('float32') + 1.0
+        ) / I_r_height  # self.I_r_height
        # P: self.I_r_width x self.I_r_height x 2
-        P = np.stack(np.meshgrid(I_r_grid_x, I_r_grid_y), axis=2)
+        P = paddle.stack(paddle.meshgrid(I_r_grid_x, I_r_grid_y), axis=2)
+        P = paddle.transpose(P, perm=[1, 0, 2])
        # n (= self.I_r_width x self.I_r_height) x 2
        return P.reshape([-1, 2])
-    def build_inv_delta_C(self, C):
+    def build_inv_delta_C_paddle(self, C):
        """ Return inv_delta_C which is needed to calculate T """
        F = self.F
-        hat_C = np.zeros((F, F), dtype=float)  # F x F
+        hat_C = paddle.zeros((F, F), dtype='float32')  # F x F
        for i in range(0, F):
            for j in range(i, F):
-                r = np.linalg.norm(C[i] - C[j])
+                if i == j:
-                hat_C[i, j] = r
+                    hat_C[i, j] = 1
-                hat_C[j, i] = r
+                else:
-        np.fill_diagonal(hat_C, 1)
+                    r = paddle.norm(C[i] - C[j])
-        hat_C = (hat_C**2) * np.log(hat_C)
+                    hat_C[i, j] = r
-        # print(C.shape, hat_C.shape)
+                    hat_C[j, i] = r
-        delta_C = np.concatenate(  # F+3 x F+3
+        hat_C = (hat_C**2) * paddle.log(hat_C)
+        delta_C = paddle.concat(  # F+3 x F+3
            [
-                np.concatenate(
+                paddle.concat(
-                    [np.ones((F, 1)), C, hat_C], axis=1),  # F x F+3
+                    [paddle.ones((F, 1)), C, hat_C], axis=1),  # F x F+3
-                np.concatenate(
+                paddle.concat(
-                    [np.zeros((2, 3)), np.transpose(C)], axis=1),  # 2 x F+3
+                    [paddle.zeros((2, 3)), paddle.transpose(
-                np.concatenate(
+                        C, perm=[1, 0])],
-                    [np.zeros((1, 3)), np.ones((1, F))], axis=1)  # 1 x F+3
+                    axis=1),  # 2 x F+3
+                paddle.concat(
+                    [paddle.zeros((1, 3)), paddle.ones((1, F))],
+                    axis=1)  # 1 x F+3
            ],
            axis=0)
-        inv_delta_C = np.linalg.inv(delta_C)
+        inv_delta_C = paddle.inverse(delta_C)
        return inv_delta_C  # F+3 x F+3
-    def build_P_hat(self, C, P):
+    def build_P_hat_paddle(self, C, P):
        F = self.F
        eps = self.eps
        n = P.shape[0]  # n (= self.I_r_width x self.I_r_height)
        # P_tile: n x 2 -> n x 1 x 2 -> n x F x 2
-        P_tile = np.tile(np.expand_dims(P, axis=1), (1, F, 1))
+        P_tile = paddle.tile(paddle.unsqueeze(P, axis=1), (1, F, 1))
-        C_tile = np.expand_dims(C, axis=0)  # 1 x F x 2
+        C_tile = paddle.unsqueeze(C, axis=0)  # 1 x F x 2
        P_diff = P_tile - C_tile  # n x F x 2
        # rbf_norm: n x F
-        rbf_norm = np.linalg.norm(P_diff, ord=2, axis=2, keepdims=False)
+        rbf_norm = paddle.norm(P_diff, p=2, axis=2, keepdim=False)
        # rbf: n x F
-        rbf = np.multiply(np.square(rbf_norm), np.log(rbf_norm + eps))
+        rbf = paddle.multiply(
-        P_hat = np.concatenate([np.ones((n, 1)), P, rbf], axis=1)
+            paddle.square(rbf_norm), paddle.log(rbf_norm + eps))
+        P_hat = paddle.concat([paddle.ones((n, 1)), P, rbf], axis=1)
        return P_hat  # n x F+3
    def get_expand_tensor(self, batch_C_prime):
-        B = batch_C_prime.shape[0]
+        B, H, C = batch_C_prime.shape
-        batch_C_prime = batch_C_prime.reshape([B, -1])
+        batch_C_prime = batch_C_prime.reshape([B, H * C])
        batch_C_ex_part_tensor = self.fc(batch_C_prime)
        batch_C_ex_part_tensor = batch_C_ex_part_tensor.reshape([-1, 3, 2])
        return batch_C_ex_part_tensor
@@ -277,10 +286,8 @@ class TPS(nn.Layer):
    def forward(self, image):
        image.stop_gradient = False
-        I_r_size = [image.shape[3], image.shape[2]]
        batch_C_prime = self.loc_net(image)
-        batch_P_prime = self.grid_generator(batch_C_prime, I_r_size)
+        batch_P_prime = self.grid_generator(batch_C_prime, image.shape[2:])
        batch_P_prime = batch_P_prime.reshape(
            [-1, image.shape[2], image.shape[3], 2])
        batch_I_r = F.grid_sample(x=image, grid=batch_P_prime)

--- a/ppocr/postprocess/east_postprocess.py
+++ b/ppocr/postprocess/east_postprocess.py
@@ -19,12 +19,10 @@ from __future__ import print_function
 import numpy as np
 from .locality_aware_nms import nms_locality
 import cv2
+import paddle
 import os
 import sys
-# __dir__ = os.path.dirname(os.path.abspath(__file__))
-# sys.path.append(__dir__)
-# sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
 class EASTPostProcess(object):
@@ -113,11 +111,14 @@ class EASTPostProcess(object):
    def __call__(self, outs_dict, shape_list):
        score_list = outs_dict['f_score']
        geo_list = outs_dict['f_geo']
+        if isinstance(score_list, paddle.Tensor):
+            score_list = score_list.numpy()
+            geo_list = geo_list.numpy()
        img_num = len(shape_list)
        dt_boxes_list = []
        for ino in range(img_num):
-            score = score_list[ino].numpy()
+            score = score_list[ino]
-            geo = geo_list[ino].numpy()
+            geo = geo_list[ino]
            boxes = self.detect(
                score_map=score,
                geo_map=geo,

--- a/ppocr/postprocess/sast_postprocess.py
+++ b/ppocr/postprocess/sast_postprocess.py
@@ -24,7 +24,7 @@ sys.path.append(os.path.join(__dir__, '..'))
 import numpy as np
 from .locality_aware_nms import nms_locality
-# import lanms
+import paddle
 import cv2
 import time
@@ -276,14 +276,19 @@ class SASTPostProcess(object):
        border_list = outs_dict['f_border']
        tvo_list = outs_dict['f_tvo']
        tco_list = outs_dict['f_tco']
+        if isinstance(score_list, paddle.Tensor):
+            score_list = score_list.numpy()
+            border_list = border_list.numpy()
+            tvo_list = tvo_list.numpy()
+            tco_list = tco_list.numpy()
        img_num = len(shape_list)
        poly_lists = []
        for ino in range(img_num):
-            p_score = score_list[ino].transpose((1,2,0)).numpy()
+            p_score = score_list[ino].transpose((1,2,0))
-            p_border = border_list[ino].transpose((1,2,0)).numpy()
+            p_border = border_list[ino].transpose((1,2,0))
-            p_tvo = tvo_list[ino].transpose((1,2,0)).numpy()
+            p_tvo = tvo_list[ino].transpose((1,2,0))
-            p_tco = tco_list[ino].transpose((1,2,0)).numpy()
+            p_tco = tco_list[ino].transpose((1,2,0))
            src_h, src_w, ratio_h, ratio_w = shape_list[ino]
            poly_list = self.detect_sast(p_score, p_tvo, p_border, p_tco, ratio_w, ratio_h, src_w, src_h, 

--- a/setup.py
+++ b/setup.py
@@ -32,7 +32,7 @@ setup(
    package_dir={'paddleocr': ''},
    include_package_data=True,
    entry_points={"console_scripts": ["paddleocr= paddleocr.paddleocr:main"]},
-    version='2.0',
+    version='2.0.1',
    install_requires=requirements,
    license='Apache License 2.0',
    description='Awesome OCR toolkits based on PaddlePaddle （8.6M ultra-lightweight pre-trained model, support training and deployment among server, mobile, embeded and IoT devices',

--- a/tools/infer/predict_det.py
+++ b/tools/infer/predict_det.py
@@ -37,33 +37,51 @@ class TextDetector(object):
    def __init__(self, args):
        self.det_algorithm = args.det_algorithm
        self.use_zero_copy_run = args.use_zero_copy_run
+        pre_process_list = [{
+            'DetResizeForTest': {
+                'limit_side_len': args.det_limit_side_len,
+                'limit_type': args.det_limit_type
+            }
+        }, {
+            'NormalizeImage': {
+                'std': [0.229, 0.224, 0.225],
+                'mean': [0.485, 0.456, 0.406],
+                'scale': '1./255.',
+                'order': 'hwc'
+            }
+        }, {
+            'ToCHWImage': None
+        }, {
+            'KeepKeys': {
+                'keep_keys': ['image', 'shape']
+            }
+        }]
        postprocess_params = {}
        if self.det_algorithm == "DB":
-            pre_process_list = [{
-                'DetResizeForTest': {
-                    'limit_side_len': args.det_limit_side_len,
-                    'limit_type': args.det_limit_type
-                }
-            }, {
-                'NormalizeImage': {
-                    'std': [0.229, 0.224, 0.225],
-                    'mean': [0.485, 0.456, 0.406],
-                    'scale': '1./255.',
-                    'order': 'hwc'
-                }
-            }, {
-                'ToCHWImage': None
-            }, {
-                'KeepKeys': {
-                    'keep_keys': ['image', 'shape']
-                }
-            }]
            postprocess_params['name'] = 'DBPostProcess'
            postprocess_params["thresh"] = args.det_db_thresh
            postprocess_params["box_thresh"] = args.det_db_box_thresh
            postprocess_params["max_candidates"] = 1000
            postprocess_params["unclip_ratio"] = args.det_db_unclip_ratio
            postprocess_params["use_dilation"] = True
+        elif self.det_algorithm == "EAST":
+            postprocess_params['name'] = 'EASTPostProcess'      
+            postprocess_params["score_thresh"] = args.det_east_score_thresh
+            postprocess_params["cover_thresh"] = args.det_east_cover_thresh
+            postprocess_params["nms_thresh"] = args.det_east_nms_thresh
+        elif self.det_algorithm == "SAST":
+            postprocess_params['name'] = 'SASTPostProcess'      
+            postprocess_params["score_thresh"] = args.det_sast_score_thresh
+            postprocess_params["nms_thresh"] = args.det_sast_nms_thresh
+            self.det_sast_polygon = args.det_sast_polygon
+            if self.det_sast_polygon:
+                postprocess_params["sample_pts_num"] = 6
+                postprocess_params["expand_scale"] = 1.2
+                postprocess_params["shrink_ratio_of_width"] = 0.2
+            else:
+                postprocess_params["sample_pts_num"] = 2
+                postprocess_params["expand_scale"] = 1.0
+                postprocess_params["shrink_ratio_of_width"] = 0.3
        else:
            logger.info("unknown det_algorithm:{}".format(self.det_algorithm))
            sys.exit(0)
@@ -149,12 +167,25 @@ class TextDetector(object):
        for output_tensor in self.output_tensors:
            output = output_tensor.copy_to_cpu()
            outputs.append(output)
-        preds = outputs[0]
-        # preds = self.predictor(img)
+        preds = {}
+        if self.det_algorithm == "EAST":
+            preds['f_geo'] = outputs[0]
+            preds['f_score'] = outputs[1]
+        elif self.det_algorithm == 'SAST':
+            preds['f_border'] = outputs[0]
+            preds['f_score'] = outputs[1]
+            preds['f_tco'] = outputs[2]
+            preds['f_tvo'] = outputs[3]
+        else:
+            preds = outputs[0]
        post_result = self.postprocess_op(preds, shape_list)
        dt_boxes = post_result[0]['points']
-        dt_boxes = self.filter_tag_det_res(dt_boxes, ori_im.shape)
+        if self.det_algorithm == "SAST" and self.det_sast_polygon:
+            dt_boxes = self.filter_tag_det_res_only_clip(dt_boxes, ori_im.shape)
+        else:
+            dt_boxes = self.filter_tag_det_res(dt_boxes, ori_im.shape)
        elapse = time.time() - starttime
        return dt_boxes, elapse

--- a/tools/test_hubserving.py
+++ b/tools/test_hubserving.py
@@ -17,8 +17,9 @@ __dir__ = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(__dir__)
 sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
-from ppocr.utils.utility import initial_logger
+from ppocr.utils.logging import get_logger
-logger = initial_logger()
+logger = get_logger()
 import cv2
 import numpy as np
 import time

--- a/train.sh
+++ b/train.sh
- python3 -m paddle.distributed.launch --selected_gpus '0,1,2,3,4,5,6,7'  tools/train.py -c configs/rec/rec_mv3_none_bilstm_ctc.yml
+# for paddle.__version__ >= 2.0rc1
\ No newline at end of file
+python3 -m paddle.distributed.launch --gpus '0,1,2,3,4,5,6,7'  tools/train.py -c configs/rec/rec_mv3_none_bilstm_ctc.yml
+# for paddle.__version__ < 2.0rc1
+# python3 -m paddle.distributed.launch --selected_gpus '0,1,2,3,4,5,6,7'  tools/train.py -c configs/rec/rec_mv3_none_bilstm_ctc.yml