Merge remote-tracking branch 'origin/dygraph' into dygraph

d9c5148f · Leif · 3998e131 · b7c8bfb4 · d9c5148f · d9c5148f
Commit d9c5148f authored Jul 05, 2021 by Leif
5 changed files
--- a/tools/infer/predict_det.py
+++ b/tools/infer/predict_det.py
@@ -31,8 +31,6 @@ from ppocr.utils.utility import get_image_file_list, check_and_read_gif
 from ppocr.data import create_operators, transform
 from ppocr.postprocess import build_post_process
-# import tools.infer.benchmark_utils as benchmark_utils
 logger = get_logger()
@@ -100,6 +98,24 @@ class TextDetector(object):
        self.predictor, self.input_tensor, self.output_tensors, self.config = utility.create_predictor(
            args, 'det', logger)
+        if args.benchmark:
+            import auto_log
+            pid = os.getpid()
+            self.autolog = auto_log.AutoLogger(
+                model_name="det",
+                model_precision=args.precision,
+                batch_size=1,
+                data_shape="dynamic",
+                save_path=args.save_log_path,
+                inference_config=self.config,
+                pids=pid,
+                process_name=None,
+                gpu_ids=0,
+                time_keys=[
+                    'preprocess_time', 'inference_time', 'postprocess_time'
+                ],
+                warmup=10)
    def order_points_clockwise(self, pts):
        """
        reference from: https://github.com/jrosebr1/imutils/blob/master/imutils/perspective.py
@@ -158,6 +174,10 @@ class TextDetector(object):
        data = {'image': img}
        st = time.time()
+        if args.benchmark:
+            self.autolog.times.start()
        data = transform(data, self.preprocess_op)
        img, shape_list = data
        if img is None:
@@ -166,12 +186,17 @@ class TextDetector(object):
        shape_list = np.expand_dims(shape_list, axis=0)
        img = img.copy()
+        if args.benchmark:
+            self.autolog.times.stamp()
        self.input_tensor.copy_from_cpu(img)
        self.predictor.run()
        outputs = []
        for output_tensor in self.output_tensors:
            output = output_tensor.copy_to_cpu()
            outputs.append(output)
+        if args.benchmark:
+            self.autolog.times.stamp()
        preds = {}
        if self.det_algorithm == "EAST":
@@ -187,7 +212,7 @@ class TextDetector(object):
        else:
            raise NotImplementedError
-        self.predictor.try_shrink_memory()
+        #self.predictor.try_shrink_memory()
        post_result = self.postprocess_op(preds, shape_list)
        dt_boxes = post_result[0]['points']
        if self.det_algorithm == "SAST" and self.det_sast_polygon:
@@ -195,6 +220,8 @@ class TextDetector(object):
        else:
            dt_boxes = self.filter_tag_det_res(dt_boxes, ori_im.shape)
+        if args.benchmark:
+            self.autolog.times.end(stamp=True)
        et = time.time()
        return dt_boxes, et - st
@@ -212,8 +239,6 @@ if __name__ == "__main__":
        for i in range(10):
            res = text_detector(img)
-    cpu_mem, gpu_mem, gpu_util = 0, 0, 0
    if not os.path.exists(draw_img_save):
        os.makedirs(draw_img_save)
    for image_file in image_file_list:
@@ -237,3 +262,6 @@ if __name__ == "__main__":
                                "det_res_{}".format(img_name_pure))
        cv2.imwrite(img_path, src_im)
        logger.info("The visualized image saved in {}".format(img_path))
+    if args.benchmark:
+        text_detector.autolog.report()
--- a/tools/infer/predict_rec.py
+++ b/tools/infer/predict_rec.py
@@ -28,7 +28,6 @@ import traceback
 import paddle
 import tools.infer.utility as utility
-import tools.infer.benchmark_utils as benchmark_utils
 from ppocr.postprocess import build_post_process
 from ppocr.utils.logging import get_logger
 from ppocr.utils.utility import get_image_file_list, check_and_read_gif
@@ -66,8 +65,6 @@ class TextRecognizer(object):
        self.predictor, self.input_tensor, self.output_tensors, self.config = \
            utility.create_predictor(args, 'rec', logger)
-        self.rec_times = utility.Timer()
    def resize_norm_img(self, img, max_wh_ratio):
        imgC, imgH, imgW = self.rec_image_shape
        assert imgC == img.shape[2]
@@ -168,14 +165,13 @@ class TextRecognizer(object):
            width_list.append(img.shape[1] / float(img.shape[0]))
        # Sorting can speed up the recognition process
        indices = np.argsort(np.array(width_list))
-        self.rec_times.total_time.start()
        rec_res = [['', 0.0]] * img_num
        batch_num = self.rec_batch_num
+        st = time.time()
        for beg_img_no in range(0, img_num, batch_num):
            end_img_no = min(img_num, beg_img_no + batch_num)
            norm_img_batch = []
            max_wh_ratio = 0
-            self.rec_times.preprocess_time.start()
            for ino in range(beg_img_no, end_img_no):
                h, w = img_list[indices[ino]].shape[0:2]
                wh_ratio = w * 1.0 / h
@@ -216,23 +212,18 @@ class TextRecognizer(object):
                    gsrm_slf_attn_bias1_list,
                    gsrm_slf_attn_bias2_list,
                ]
-                self.rec_times.preprocess_time.end()
-                self.rec_times.inference_time.start()
                input_names = self.predictor.get_input_names()
                for i in range(len(input_names)):
                    input_tensor = self.predictor.get_input_handle(input_names[
                        i])
                    input_tensor.copy_from_cpu(inputs[i])
                self.predictor.run()
-                self.rec_times.inference_time.end()
                outputs = []
                for output_tensor in self.output_tensors:
                    output = output_tensor.copy_to_cpu()
                    outputs.append(output)
                preds = {"predict": outputs[2]}
            else:
-                self.rec_times.preprocess_time.end()
-                self.rec_times.inference_time.start()
                self.input_tensor.copy_from_cpu(norm_img_batch)
                self.predictor.run()
@@ -241,15 +232,11 @@ class TextRecognizer(object):
                    output = output_tensor.copy_to_cpu()
                    outputs.append(output)
                preds = outputs[0]
-            self.rec_times.inference_time.end()
-            self.rec_times.postprocess_time.start()
            rec_result = self.postprocess_op(preds)
            for rno in range(len(rec_result)):
                rec_res[indices[beg_img_no + rno]] = rec_result[rno]
-            self.rec_times.postprocess_time.end()
-            self.rec_times.img_num += int(norm_img_batch.shape[0])
+        return rec_res, time.time() - st
-        self.rec_times.total_time.end()
-        return rec_res, self.rec_times.total_time.value()
 def main(args):
@@ -278,12 +265,6 @@ def main(args):
        img_list.append(img)
    try:
        rec_res, _ = text_recognizer(img_list)
-        if args.benchmark:
-            cm, gm, gu = utility.get_current_memory_mb(0)
-            cpu_mem += cm
-            gpu_mem += gm
-            gpu_util += gu
-            count += 1
    except Exception as E:
        logger.info(traceback.format_exc())
@@ -292,38 +273,6 @@ def main(args):
    for ino in range(len(img_list)):
        logger.info("Predicts of {}:{}".format(valid_image_file_list[ino],
                                               rec_res[ino]))
-    if args.benchmark:
-        mems = {
-            'cpu_rss_mb': cpu_mem / count,
-            'gpu_rss_mb': gpu_mem / count,
-            'gpu_util': gpu_util * 100 / count
-        }
-    else:
-        mems = None
-    logger.info("The predict time about recognizer module is as follows: ")
-    rec_time_dict = text_recognizer.rec_times.report(average=True)
-    rec_model_name = args.rec_model_dir
-    if args.benchmark:
-        # construct log information
-        model_info = {
-            'model_name': args.rec_model_dir.split('/')[-1],
-            'precision': args.precision
-        }
-        data_info = {
-            'batch_size': args.rec_batch_num,
-            'shape': 'dynamic_shape',
-            'data_num': rec_time_dict['img_num']
-        }
-        perf_info = {
-            'preprocess_time_s': rec_time_dict['preprocess_time'],
-            'inference_time_s': rec_time_dict['inference_time'],
-            'postprocess_time_s': rec_time_dict['postprocess_time'],
-            'total_time_s': rec_time_dict['total_time']
-        }
-        benchmark_log = benchmark_utils.PaddleInferBenchmark(
-            text_recognizer.config, model_info, data_info, perf_info, mems)
-        benchmark_log("Rec")
 if __name__ == "__main__":

--- a/tools/infer/predict_system.py
+++ b/tools/infer/predict_system.py
@@ -33,8 +33,7 @@ import tools.infer.predict_det as predict_det
 import tools.infer.predict_cls as predict_cls
 from ppocr.utils.utility import get_image_file_list, check_and_read_gif
 from ppocr.utils.logging import get_logger
-from tools.infer.utility import draw_ocr_box_txt, get_current_memory_mb
+from tools.infer.utility import draw_ocr_box_txt, get_rotate_crop_image
-import tools.infer.benchmark_utils as benchmark_utils
 logger = get_logger()
@@ -50,39 +49,6 @@ class TextSystem(object):
        if self.use_angle_cls:
            self.text_classifier = predict_cls.TextClassifier(args)
-    def get_rotate_crop_image(self, img, points):
-        '''
-        img_height, img_width = img.shape[0:2]
-        left = int(np.min(points[:, 0]))
-        right = int(np.max(points[:, 0]))
-        top = int(np.min(points[:, 1]))
-        bottom = int(np.max(points[:, 1]))
-        img_crop = img[top:bottom, left:right, :].copy()
-        points[:, 0] = points[:, 0] - left
-        points[:, 1] = points[:, 1] - top
-        '''
-        img_crop_width = int(
-            max(
-                np.linalg.norm(points[0] - points[1]),
-                np.linalg.norm(points[2] - points[3])))
-        img_crop_height = int(
-            max(
-                np.linalg.norm(points[0] - points[3]),
-                np.linalg.norm(points[1] - points[2])))
-        pts_std = np.float32([[0, 0], [img_crop_width, 0],
-                              [img_crop_width, img_crop_height],
-                              [0, img_crop_height]])
-        M = cv2.getPerspectiveTransform(points, pts_std)
-        dst_img = cv2.warpPerspective(
-            img,
-            M, (img_crop_width, img_crop_height),
-            borderMode=cv2.BORDER_REPLICATE,
-            flags=cv2.INTER_CUBIC)
-        dst_img_height, dst_img_width = dst_img.shape[0:2]
-        if dst_img_height * 1.0 / dst_img_width >= 1.5:
-            dst_img = np.rot90(dst_img)
-        return dst_img
    def print_draw_crop_rec_res(self, img_crop_list, rec_res):
        bbox_num = len(img_crop_list)
        for bno in range(bbox_num):
@@ -103,7 +69,7 @@ class TextSystem(object):
        for bno in range(len(dt_boxes)):
            tmp_box = copy.deepcopy(dt_boxes[bno])
-            img_crop = self.get_rotate_crop_image(ori_im, tmp_box)
+            img_crop = get_rotate_crop_image(ori_im, tmp_box)
            img_crop_list.append(img_crop)
        if self.use_angle_cls and cls:
            img_crop_list, angle_list, elapse = self.text_classifier(
@@ -158,7 +124,7 @@ def main(args):
        img = np.random.uniform(0, 255, [640, 640, 3]).astype(np.uint8)
        for i in range(10):
            res = text_sys(img)
    total_time = 0
    cpu_mem, gpu_mem, gpu_util = 0, 0, 0
    _st = time.time()
@@ -175,12 +141,6 @@ def main(args):
        dt_boxes, rec_res = text_sys(img)
        elapse = time.time() - starttime
        total_time += elapse
-        if args.benchmark and idx % 20 == 0:
-            cm, gm, gu = get_current_memory_mb(0)
-            cpu_mem += cm
-            gpu_mem += gm
-            gpu_util += gu
-            count += 1
        logger.info(
            str(idx) + "  Predict time of %s: %.3fs" % (image_file, elapse))
@@ -215,61 +175,6 @@ def main(args):
    logger.info("\nThe predict total time is {}".format(total_time))
    img_num = text_sys.text_detector.det_times.img_num
-    if args.benchmark:
-        mems = {
-            'cpu_rss_mb': cpu_mem / count,
-            'gpu_rss_mb': gpu_mem / count,
-            'gpu_util': gpu_util * 100 / count
-        }
-    else:
-        mems = None
-    det_time_dict = text_sys.text_detector.det_times.report(average=True)
-    rec_time_dict = text_sys.text_recognizer.rec_times.report(average=True)
-    det_model_name = args.det_model_dir
-    rec_model_name = args.rec_model_dir
-    # construct det log information
-    model_info = {
-        'model_name': args.det_model_dir.split('/')[-1],
-        'precision': args.precision
-    }
-    data_info = {
-        'batch_size': 1,
-        'shape': 'dynamic_shape',
-        'data_num': det_time_dict['img_num']
-    }
-    perf_info = {
-        'preprocess_time_s': det_time_dict['preprocess_time'],
-        'inference_time_s': det_time_dict['inference_time'],
-        'postprocess_time_s': det_time_dict['postprocess_time'],
-        'total_time_s': det_time_dict['total_time']
-    }
-    benchmark_log = benchmark_utils.PaddleInferBenchmark(
-        text_sys.text_detector.config, model_info, data_info, perf_info, mems,
-        args.save_log_path)
-    benchmark_log("Det")
-    # construct rec log information
-    model_info = {
-        'model_name': args.rec_model_dir.split('/')[-1],
-        'precision': args.precision
-    }
-    data_info = {
-        'batch_size': args.rec_batch_num,
-        'shape': 'dynamic_shape',
-        'data_num': rec_time_dict['img_num']
-    }
-    perf_info = {
-        'preprocess_time_s': rec_time_dict['preprocess_time'],
-        'inference_time_s': rec_time_dict['inference_time'],
-        'postprocess_time_s': rec_time_dict['postprocess_time'],
-        'total_time_s': rec_time_dict['total_time']
-    }
-    benchmark_log = benchmark_utils.PaddleInferBenchmark(
-        text_sys.text_recognizer.config, model_info, data_info, perf_info, mems,
-        args.save_log_path)
-    benchmark_log("Rec")
 if __name__ == "__main__":

--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@@ -37,6 +37,7 @@ def init_args():
    parser.add_argument("--use_gpu", type=str2bool, default=True)
    parser.add_argument("--ir_optim", type=str2bool, default=True)
    parser.add_argument("--use_tensorrt", type=str2bool, default=False)
+    parser.add_argument("--min_subgraph_size", type=int, default=3)
    parser.add_argument("--precision", type=str, default="fp32")
    parser.add_argument("--gpu_mem", type=int, default=500)
@@ -124,76 +125,6 @@ def parse_args():
    return parser.parse_args()
-class Times(object):
-    def __init__(self):
-        self.time = 0.
-        self.st = 0.
-        self.et = 0.
-    def start(self):
-        self.st = time.time()
-    def end(self, accumulative=True):
-        self.et = time.time()
-        if accumulative:
-            self.time += self.et - self.st
-        else:
-            self.time = self.et - self.st
-    def reset(self):
-        self.time = 0.
-        self.st = 0.
-        self.et = 0.
-    def value(self):
-        return round(self.time, 4)
-class Timer(Times):
-    def __init__(self):
-        super(Timer, self).__init__()
-        self.total_time = Times()
-        self.preprocess_time = Times()
-        self.inference_time = Times()
-        self.postprocess_time = Times()
-        self.img_num = 0
-    def info(self, average=False):
-        logger.info("----------------------- Perf info -----------------------")
-        logger.info("total_time: {}, img_num: {}".format(self.total_time.value(
-        ), self.img_num))
-        preprocess_time = round(self.preprocess_time.value() / self.img_num,
-                                4) if average else self.preprocess_time.value()
-        postprocess_time = round(
-            self.postprocess_time.value() / self.img_num,
-            4) if average else self.postprocess_time.value()
-        inference_time = round(self.inference_time.value() / self.img_num,
-                               4) if average else self.inference_time.value()
-        average_latency = self.total_time.value() / self.img_num
-        logger.info("average_latency(ms): {:.2f}, QPS: {:2f}".format(
-            average_latency * 1000, 1 / average_latency))
-        logger.info(
-            "preprocess_latency(ms): {:.2f}, inference_latency(ms): {:.2f}, postprocess_latency(ms): {:.2f}".
-            format(preprocess_time * 1000, inference_time * 1000,
-                   postprocess_time * 1000))
-    def report(self, average=False):
-        dic = {}
-        dic['preprocess_time'] = round(
-            self.preprocess_time.value() / self.img_num,
-            4) if average else self.preprocess_time.value()
-        dic['postprocess_time'] = round(
-            self.postprocess_time.value() / self.img_num,
-            4) if average else self.postprocess_time.value()
-        dic['inference_time'] = round(
-            self.inference_time.value() / self.img_num,
-            4) if average else self.inference_time.value()
-        dic['img_num'] = self.img_num
-        dic['total_time'] = round(self.total_time.value(), 4)
-        return dic
 def create_predictor(args, mode, logger):
    if mode == "det":
        model_dir = args.det_model_dir
@@ -212,11 +143,10 @@ def create_predictor(args, mode, logger):
    model_file_path = model_dir + "/inference.pdmodel"
    params_file_path = model_dir + "/inference.pdiparams"
    if not os.path.exists(model_file_path):
-        logger.info("not find model file path {}".format(model_file_path))
+        raise ValueError("not find model file path {}".format(model_file_path))
-        sys.exit(0)
    if not os.path.exists(params_file_path):
-        logger.info("not find params file path {}".format(params_file_path))
+        raise ValueError("not find params file path {}".format(
-        sys.exit(0)
+            params_file_path))
    config = inference.Config(model_file_path, params_file_path)
@@ -236,14 +166,17 @@ def create_predictor(args, mode, logger):
            config.enable_tensorrt_engine(
                precision_mode=inference.PrecisionType.Float32,
                max_batch_size=args.max_batch_size,
-                min_subgraph_size=3)  # skip the minmum trt subgraph
+                min_subgraph_size=args.min_subgraph_size)
-        if mode == "det" and "mobile" in model_file_path:
+            # skip the minmum trt subgraph
+        if mode == "det":
            min_input_shape = {
                "x": [1, 3, 50, 50],
                "conv2d_92.tmp_0": [1, 96, 20, 20],
                "conv2d_91.tmp_0": [1, 96, 10, 10],
+                "conv2d_59.tmp_0": [1, 96, 20, 20],
                "nearest_interp_v2_1.tmp_0": [1, 96, 10, 10],
                "nearest_interp_v2_2.tmp_0": [1, 96, 20, 20],
+                "conv2d_124.tmp_0": [1, 96, 20, 20],
                "nearest_interp_v2_3.tmp_0": [1, 24, 20, 20],
                "nearest_interp_v2_4.tmp_0": [1, 24, 20, 20],
                "nearest_interp_v2_5.tmp_0": [1, 24, 20, 20],
@@ -254,7 +187,9 @@ def create_predictor(args, mode, logger):
                "x": [1, 3, 2000, 2000],
                "conv2d_92.tmp_0": [1, 96, 400, 400],
                "conv2d_91.tmp_0": [1, 96, 200, 200],
+                "conv2d_59.tmp_0": [1, 96, 400, 400],
                "nearest_interp_v2_1.tmp_0": [1, 96, 200, 200],
+                "conv2d_124.tmp_0": [1, 256, 400, 400],
                "nearest_interp_v2_2.tmp_0": [1, 96, 400, 400],
                "nearest_interp_v2_3.tmp_0": [1, 24, 400, 400],
                "nearest_interp_v2_4.tmp_0": [1, 24, 400, 400],
@@ -266,39 +201,16 @@ def create_predictor(args, mode, logger):
                "x": [1, 3, 640, 640],
                "conv2d_92.tmp_0": [1, 96, 160, 160],
                "conv2d_91.tmp_0": [1, 96, 80, 80],
+                "conv2d_59.tmp_0": [1, 96, 160, 160],
                "nearest_interp_v2_1.tmp_0": [1, 96, 80, 80],
                "nearest_interp_v2_2.tmp_0": [1, 96, 160, 160],
+                "conv2d_124.tmp_0": [1, 256, 160, 160],
                "nearest_interp_v2_3.tmp_0": [1, 24, 160, 160],
                "nearest_interp_v2_4.tmp_0": [1, 24, 160, 160],
                "nearest_interp_v2_5.tmp_0": [1, 24, 160, 160],
                "elementwise_add_7": [1, 56, 40, 40],
                "nearest_interp_v2_0.tmp_0": [1, 96, 40, 40]
            }
-        if mode == "det" and "server" in model_file_path:
-            min_input_shape = {
-                "x": [1, 3, 50, 50],
-                "conv2d_59.tmp_0": [1, 96, 20, 20],
-                "nearest_interp_v2_2.tmp_0": [1, 96, 20, 20],
-                "nearest_interp_v2_3.tmp_0": [1, 24, 20, 20],
-                "nearest_interp_v2_4.tmp_0": [1, 24, 20, 20],
-                "nearest_interp_v2_5.tmp_0": [1, 24, 20, 20]
-            }
-            max_input_shape = {
-                "x": [1, 3, 2000, 2000],
-                "conv2d_59.tmp_0": [1, 96, 400, 400],
-                "nearest_interp_v2_2.tmp_0": [1, 96, 400, 400],
-                "nearest_interp_v2_3.tmp_0": [1, 24, 400, 400],
-                "nearest_interp_v2_4.tmp_0": [1, 24, 400, 400],
-                "nearest_interp_v2_5.tmp_0": [1, 24, 400, 400]
-            }
-            opt_input_shape = {
-                "x": [1, 3, 640, 640],
-                "conv2d_59.tmp_0": [1, 96, 160, 160],
-                "nearest_interp_v2_2.tmp_0": [1, 96, 160, 160],
-                "nearest_interp_v2_3.tmp_0": [1, 24, 160, 160],
-                "nearest_interp_v2_4.tmp_0": [1, 24, 160, 160],
-                "nearest_interp_v2_5.tmp_0": [1, 24, 160, 160]
-            }
        elif mode == "rec":
            min_input_shape = {"x": [args.rec_batch_num, 3, 32, 10]}
            max_input_shape = {"x": [args.rec_batch_num, 3, 32, 2000]}
@@ -328,11 +240,11 @@ def create_predictor(args, mode, logger):
    # enable memory optim
    config.enable_memory_optim()
-    config.disable_glog_info()
+    #config.disable_glog_info()
    config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass")
    if mode == 'table':
-        config.delete_pass("fc_fuse_pass") # not supported for table    
+        config.delete_pass("fc_fuse_pass")  # not supported for table
    config.switch_use_feed_fetch_ops(False)
    config.switch_ir_optim(True)
@@ -597,29 +509,39 @@ def draw_boxes(image, boxes, scores=None, drop_score=0.5):
    return image
-def get_current_memory_mb(gpu_id=None):
+def get_rotate_crop_image(img, points):
-    """
+    '''
-    It is used to Obtain the memory usage of the CPU and GPU during the running of the program.
+    img_height, img_width = img.shape[0:2]
-    And this function Current program is time-consuming.
+    left = int(np.min(points[:, 0]))
-    """
+    right = int(np.max(points[:, 0]))
-    import pynvml
+    top = int(np.min(points[:, 1]))
-    import psutil
+    bottom = int(np.max(points[:, 1]))
-    import GPUtil
+    img_crop = img[top:bottom, left:right, :].copy()
-    pid = os.getpid()
+    points[:, 0] = points[:, 0] - left
-    p = psutil.Process(pid)
+    points[:, 1] = points[:, 1] - top
-    info = p.memory_full_info()
+    '''
-    cpu_mem = info.uss / 1024. / 1024.
+    assert len(points) == 4, "shape of points must be 4*2"
-    gpu_mem = 0
+    img_crop_width = int(
-    gpu_percent = 0
+        max(
-    if gpu_id is not None:
+            np.linalg.norm(points[0] - points[1]),
-        GPUs = GPUtil.getGPUs()
+            np.linalg.norm(points[2] - points[3])))
-        gpu_load = GPUs[gpu_id].load
+    img_crop_height = int(
-        gpu_percent = gpu_load
+        max(
-        pynvml.nvmlInit()
+            np.linalg.norm(points[0] - points[3]),
-        handle = pynvml.nvmlDeviceGetHandleByIndex(0)
+            np.linalg.norm(points[1] - points[2])))
-        meminfo = pynvml.nvmlDeviceGetMemoryInfo(handle)
+    pts_std = np.float32([[0, 0], [img_crop_width, 0],
-        gpu_mem = meminfo.used / 1024. / 1024.
+                          [img_crop_width, img_crop_height],
-    return round(cpu_mem, 4), round(gpu_mem, 4), round(gpu_percent, 4)
+                          [0, img_crop_height]])
+    M = cv2.getPerspectiveTransform(points, pts_std)
+    dst_img = cv2.warpPerspective(
+        img,
+        M, (img_crop_width, img_crop_height),
+        borderMode=cv2.BORDER_REPLICATE,
+        flags=cv2.INTER_CUBIC)
+    dst_img_height, dst_img_width = dst_img.shape[0:2]
+    if dst_img_height * 1.0 / dst_img_width >= 1.5:
+        dst_img = np.rot90(dst_img)
+    return dst_img
 if __name__ == '__main__':

--- a/tools/train.py
+++ b/tools/train.py
@@ -35,7 +35,7 @@ from ppocr.losses import build_loss
 from ppocr.optimizer import build_optimizer
 from ppocr.postprocess import build_post_process
 from ppocr.metrics import build_metric
-from ppocr.utils.save_load import init_model
+from ppocr.utils.save_load import init_model, load_dygraph_params
 import tools.program as program
 dist.get_world_size()
@@ -97,7 +97,7 @@ def main(config, device, logger, vdl_writer):
    # build metric
    eval_class = build_metric(config['Metric'])
    # load pretrain model
-    pre_best_model_dict = init_model(config, model, optimizer)
+    pre_best_model_dict = load_dygraph_params(config, model, logger, optimizer)
    logger.info('train dataloader has {} iters'.format(len(train_dataloader)))
    if valid_dataloader is not None: