merge dygraph

ac8c2a89 · WenmuZhou · 88a8be12 · e174e9ed · ac8c2a89 · ac8c2a89
Commit ac8c2a89 authored Jun 30, 2021 by WenmuZhou
8 changed files
--- a/tools/infer/predict_det.py
+++ b/tools/infer/predict_det.py
@@ -31,7 +31,7 @@ from ppocr.utils.utility import get_image_file_list, check_and_read_gif
 from ppocr.data import create_operators, transform
 from ppocr.postprocess import build_post_process

-import tools.infer.benchmark_utils as benchmark_utils
+# import tools.infer.benchmark_utils as benchmark_utils

 logger = get_logger()

@@ -43,7 +43,7 @@ class TextDetector(object):
        pre_process_list = [{
            'DetResizeForTest': {
                'limit_side_len': args.det_limit_side_len,
-                'limit_type': args.det_limit_type
+                'limit_type': args.det_limit_type,
            }
        }, {
            'NormalizeImage': {
@@ -100,8 +100,6 @@ class TextDetector(object):
        self.predictor, self.input_tensor, self.output_tensors, self.config = utility.create_predictor(
            args, 'det', logger)

-        self.det_times = utility.Timer()
-
    def order_points_clockwise(self, pts):
        """
        reference from: https://github.com/jrosebr1/imutils/blob/master/imutils/perspective.py
@@ -158,8 +156,8 @@ class TextDetector(object):
    def __call__(self, img):
        ori_im = img.copy()
        data = {'image': img}
-        self.det_times.total_time.start()
-        self.det_times.preprocess_time.start()
+
+        st = time.time()
        data = transform(data, self.preprocess_op)
        img, shape_list = data
        if img is None:
@@ -168,16 +166,12 @@ class TextDetector(object):
        shape_list = np.expand_dims(shape_list, axis=0)
        img = img.copy()

-        self.det_times.preprocess_time.end()
-        self.det_times.inference_time.start()
-
        self.input_tensor.copy_from_cpu(img)
        self.predictor.run()
        outputs = []
        for output_tensor in self.output_tensors:
            output = output_tensor.copy_to_cpu()
            outputs.append(output)
-        self.det_times.inference_time.end()

        preds = {}
        if self.det_algorithm == "EAST":
@@ -193,8 +187,6 @@ class TextDetector(object):
        else:
            raise NotImplementedError

-        self.det_times.postprocess_time.start()
-
        self.predictor.try_shrink_memory()
        post_result = self.postprocess_op(preds, shape_list)
        dt_boxes = post_result[0]['points']
@@ -203,10 +195,8 @@ class TextDetector(object):
        else:
            dt_boxes = self.filter_tag_det_res(dt_boxes, ori_im.shape)

-        self.det_times.postprocess_time.end()
-        self.det_times.total_time.end()
-        self.det_times.img_num += 1
-        return dt_boxes, self.det_times.total_time.value()
+        et = time.time()
+        return dt_boxes, et - st


 if __name__ == "__main__":
@@ -216,12 +206,13 @@ if __name__ == "__main__":
    count = 0
    total_time = 0
    draw_img_save = "./inference_results"
-    cpu_mem, gpu_mem, gpu_util = 0, 0, 0

-    # warmup 10 times
-    fake_img = np.random.uniform(-1, 1, [640, 640, 3]).astype(np.float32)
-    for i in range(10):
-        dt_boxes, _ = text_detector(fake_img)
+    if args.warmup:
+        img = np.random.uniform(0, 255, [640, 640, 3]).astype(np.uint8)
+        for i in range(10):
+            res = text_detector(img)
+
+    cpu_mem, gpu_mem, gpu_util = 0, 0, 0

    if not os.path.exists(draw_img_save):
        os.makedirs(draw_img_save)
@@ -239,49 +230,11 @@ if __name__ == "__main__":
            total_time += elapse
        count += 1

-        if args.benchmark:
-            cm, gm, gu = utility.get_current_memory_mb(0)
-            cpu_mem += cm
-            gpu_mem += gm
-            gpu_util += gu
-
        logger.info("Predict time of {}: {}".format(image_file, elapse))
        src_im = utility.draw_text_det_res(dt_boxes, image_file)
        img_name_pure = os.path.split(image_file)[-1]
        img_path = os.path.join(draw_img_save,
                                "det_res_{}".format(img_name_pure))
-
+        cv2.imwrite(img_path, src_im)
        logger.info("The visualized image saved in {}".format(img_path))
-    # print the information about memory and time-spent
-    if args.benchmark:
-        mems = {
-            'cpu_rss_mb': cpu_mem / count,
-            'gpu_rss_mb': gpu_mem / count,
-            'gpu_util': gpu_util * 100 / count
-        }
-    else:
-        mems = None
-    logger.info("The predict time about detection module is as follows: ")
-    det_time_dict = text_detector.det_times.report(average=True)
-    det_model_name = args.det_model_dir

-    if args.benchmark:
-        # construct log information
-        model_info = {
-            'model_name': args.det_model_dir.split('/')[-1],
-            'precision': args.precision
-        }
-        data_info = {
-            'batch_size': 1,
-            'shape': 'dynamic_shape',
-            'data_num': det_time_dict['img_num']
-        }
-        perf_info = {
-            'preprocess_time_s': det_time_dict['preprocess_time'],
-            'inference_time_s': det_time_dict['inference_time'],
-            'postprocess_time_s': det_time_dict['postprocess_time'],
-            'total_time_s': det_time_dict['total_time']
-        }
-        benchmark_log = benchmark_utils.PaddleInferBenchmark(
-            text_detector.config, model_info, data_info, perf_info, mems)
-        benchmark_log("Det")
--- a/tools/infer/predict_rec.py
+++ b/tools/infer/predict_rec.py
@@ -257,13 +257,15 @@ def main(args):
    text_recognizer = TextRecognizer(args)
    valid_image_file_list = []
    img_list = []
-    cpu_mem, gpu_mem, gpu_util = 0, 0, 0
-    count = 0

    # warmup 10 times
-    fake_img = np.random.uniform(-1, 1, [1, 32, 320, 3]).astype(np.float32)
-    for i in range(10):
-        dt_boxes, _ = text_recognizer(fake_img)
+    if args.warmup:
+        img = np.random.uniform(0, 255, [32, 320, 3]).astype(np.uint8)
+        for i in range(10):
+            res = text_recognizer([img])
+
+    cpu_mem, gpu_mem, gpu_util = 0, 0, 0
+    count = 0

    for image_file in image_file_list:
        img, flag = check_and_read_gif(image_file)
@@ -320,7 +322,8 @@ def main(args):
            'total_time_s': rec_time_dict['total_time']
        }
        benchmark_log = benchmark_utils.PaddleInferBenchmark(
-            text_recognizer.config, model_info, data_info, perf_info, mems)
+            text_recognizer.config, model_info, data_info, perf_info, mems,
+            args.save_log_path)
        benchmark_log("Rec")



--- a/tools/infer/predict_system.py
+++ b/tools/infer/predict_system.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 import os
 import sys
+import subprocess

 __dir__ = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(__dir__)
@@ -24,6 +25,7 @@ import cv2
 import copy
 import numpy as np
 import time
+import logging
 from PIL import Image
 import tools.infer.utility as utility
 import tools.infer.predict_rec as predict_rec
@@ -38,6 +40,9 @@ logger = get_logger()

 class TextSystem(object):
    def __init__(self, args):
+        if not args.show_log:
+            logger.setLevel(logging.INFO)
+
        self.text_detector = predict_det.TextDetector(args)
        self.text_recognizer = predict_rec.TextRecognizer(args)
        self.use_angle_cls = args.use_angle_cls
@@ -55,7 +60,7 @@ class TextSystem(object):
        ori_im = img.copy()
        dt_boxes, elapse = self.text_detector(img)

-        logger.info("dt_boxes num : {}, elapse : {}".format(
+        logger.debug("dt_boxes num : {}, elapse : {}".format(
            len(dt_boxes), elapse))
        if dt_boxes is None:
            return None, None
@@ -70,11 +75,11 @@ class TextSystem(object):
        if self.use_angle_cls and cls:
            img_crop_list, angle_list, elapse = self.text_classifier(
                img_crop_list)
-            logger.info("cls num  : {}, elapse : {}".format(
+            logger.debug("cls num  : {}, elapse : {}".format(
                len(img_crop_list), elapse))

        rec_res, elapse = self.text_recognizer(img_crop_list)
-        logger.info("rec_res num  : {}, elapse : {}".format(
+        logger.debug("rec_res num  : {}, elapse : {}".format(
            len(rec_res), elapse))
        # self.print_draw_crop_rec_res(img_crop_list, rec_res)
        filter_boxes, filter_rec_res = [], []
@@ -109,15 +114,24 @@ def sorted_boxes(dt_boxes):

 def main(args):
    image_file_list = get_image_file_list(args.image_dir)
+    image_file_list = image_file_list[args.process_id::args.total_process_num]
    text_sys = TextSystem(args)
    is_visualize = True
    font_path = args.vis_font_path
    drop_score = args.drop_score
+
+    # warm up 10 times
+    if args.warmup:
+        img = np.random.uniform(0, 255, [640, 640, 3]).astype(np.uint8)
+        for i in range(10):
+            res = text_sys(img)
+
    total_time = 0
    cpu_mem, gpu_mem, gpu_util = 0, 0, 0
    _st = time.time()
    count = 0
    for idx, image_file in enumerate(image_file_list):
+
        img, flag = check_and_read_gif(image_file)
        if not flag:
            img = cv2.imread(image_file)
@@ -226,4 +240,18 @@ def main(args):


 if __name__ == "__main__":
-    main(utility.parse_args())
+    args = utility.parse_args()
+    if args.use_mp:
+        p_list = []
+        total_process_num = args.total_process_num
+        for process_id in range(total_process_num):
+            cmd = [sys.executable, "-u"] + sys.argv + [
+                "--process_id={}".format(process_id),
+                "--use_mp={}".format(False)
+            ]
+            p = subprocess.Popen(cmd, stdout=sys.stdout, stderr=sys.stdout)
+            p_list.append(p)
+        for p in p_list:
+            p.wait()
+    else:
+        main(args)
--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@@ -37,6 +37,7 @@ def init_args():
    parser.add_argument("--use_gpu", type=str2bool, default=True)
    parser.add_argument("--ir_optim", type=str2bool, default=True)
    parser.add_argument("--use_tensorrt", type=str2bool, default=False)
+    parser.add_argument("--min_subgraph_size", type=int, default=3)
    parser.add_argument("--precision", type=str, default="fp32")
    parser.add_argument("--gpu_mem", type=int, default=500)

@@ -105,7 +106,9 @@ def init_args():
    parser.add_argument("--enable_mkldnn", type=str2bool, default=False)
    parser.add_argument("--cpu_threads", type=int, default=10)
    parser.add_argument("--use_pdserving", type=str2bool, default=False)
+    parser.add_argument("--warmup", type=str2bool, default=True)

+    # multi-process
    parser.add_argument("--use_mp", type=str2bool, default=False)
    parser.add_argument("--total_process_num", type=int, default=1)
    parser.add_argument("--process_id", type=int, default=0)
@@ -113,6 +116,7 @@ def init_args():
    parser.add_argument("--benchmark", type=bool, default=False)
    parser.add_argument("--save_log_path", type=str, default="./log_output/")

+    parser.add_argument("--show_log", type=str2bool, default=True)
    return parser


@@ -198,6 +202,8 @@ def create_predictor(args, mode, logger):
        model_dir = args.cls_model_dir
    elif mode == 'rec':
        model_dir = args.rec_model_dir
+    elif mode == 'table':
+        model_dir = args.table_model_dir
    else:
        model_dir = args.e2e_model_dir

@@ -231,12 +237,14 @@ def create_predictor(args, mode, logger):
            config.enable_tensorrt_engine(
                precision_mode=inference.PrecisionType.Float32,
                max_batch_size=args.max_batch_size,
-                min_subgraph_size=3)  # skip the minmum trt subgraph
-        if mode == "det" and "mobile" in model_file_path:
+                min_subgraph_size=args.min_subgraph_size)
+            # skip the minmum trt subgraph
+        if mode == "det":
            min_input_shape = {
                "x": [1, 3, 50, 50],
                "conv2d_92.tmp_0": [1, 96, 20, 20],
                "conv2d_91.tmp_0": [1, 96, 10, 10],
+                "conv2d_59.tmp_0": [1, 96, 20, 20],
                "nearest_interp_v2_1.tmp_0": [1, 96, 10, 10],
                "nearest_interp_v2_2.tmp_0": [1, 96, 20, 20],
                "nearest_interp_v2_3.tmp_0": [1, 24, 20, 20],
@@ -249,6 +257,7 @@ def create_predictor(args, mode, logger):
                "x": [1, 3, 2000, 2000],
                "conv2d_92.tmp_0": [1, 96, 400, 400],
                "conv2d_91.tmp_0": [1, 96, 200, 200],
+                "conv2d_59.tmp_0": [1, 96, 400, 400],
                "nearest_interp_v2_1.tmp_0": [1, 96, 200, 200],
                "nearest_interp_v2_2.tmp_0": [1, 96, 400, 400],
                "nearest_interp_v2_3.tmp_0": [1, 24, 400, 400],
@@ -261,6 +270,7 @@ def create_predictor(args, mode, logger):
                "x": [1, 3, 640, 640],
                "conv2d_92.tmp_0": [1, 96, 160, 160],
                "conv2d_91.tmp_0": [1, 96, 80, 80],
+                "conv2d_59.tmp_0": [1, 96, 160, 160],
                "nearest_interp_v2_1.tmp_0": [1, 96, 80, 80],
                "nearest_interp_v2_2.tmp_0": [1, 96, 160, 160],
                "nearest_interp_v2_3.tmp_0": [1, 24, 160, 160],
@@ -269,31 +279,6 @@ def create_predictor(args, mode, logger):
                "elementwise_add_7": [1, 56, 40, 40],
                "nearest_interp_v2_0.tmp_0": [1, 96, 40, 40]
            }
-        if mode == "det" and "server" in model_file_path:
-            min_input_shape = {
-                "x": [1, 3, 50, 50],
-                "conv2d_59.tmp_0": [1, 96, 20, 20],
-                "nearest_interp_v2_2.tmp_0": [1, 96, 20, 20],
-                "nearest_interp_v2_3.tmp_0": [1, 24, 20, 20],
-                "nearest_interp_v2_4.tmp_0": [1, 24, 20, 20],
-                "nearest_interp_v2_5.tmp_0": [1, 24, 20, 20]
-            }
-            max_input_shape = {
-                "x": [1, 3, 2000, 2000],
-                "conv2d_59.tmp_0": [1, 96, 400, 400],
-                "nearest_interp_v2_2.tmp_0": [1, 96, 400, 400],
-                "nearest_interp_v2_3.tmp_0": [1, 24, 400, 400],
-                "nearest_interp_v2_4.tmp_0": [1, 24, 400, 400],
-                "nearest_interp_v2_5.tmp_0": [1, 24, 400, 400]
-            }
-            opt_input_shape = {
-                "x": [1, 3, 640, 640],
-                "conv2d_59.tmp_0": [1, 96, 160, 160],
-                "nearest_interp_v2_2.tmp_0": [1, 96, 160, 160],
-                "nearest_interp_v2_3.tmp_0": [1, 24, 160, 160],
-                "nearest_interp_v2_4.tmp_0": [1, 24, 160, 160],
-                "nearest_interp_v2_5.tmp_0": [1, 24, 160, 160]
-            }
        elif mode == "rec":
            min_input_shape = {"x": [args.rec_batch_num, 3, 32, 10]}
            max_input_shape = {"x": [args.rec_batch_num, 3, 32, 2000]}
@@ -326,7 +311,10 @@ def create_predictor(args, mode, logger):
    config.disable_glog_info()

    config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass")
+    if mode == 'table':
+        config.delete_pass("fc_fuse_pass")  # not supported for table
    config.switch_use_feed_fetch_ops(False)
+    config.switch_ir_optim(True)

    # create predictor
    predictor = inference.create_predictor(config)

--- a/tools/infer_det.py
+++ b/tools/infer_det.py
@@ -112,4 +112,4 @@ def main():

 if __name__ == '__main__':
    config, device, logger, vdl_writer = program.preprocess()
-    main()
\ No newline at end of file
+    main()
--- a/tools/infer_table.py
+++ b/tools/infer_table.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+import os
+import sys
+import json
+
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(__dir__)
+sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
+
+os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
+
+import paddle
+from paddle.jit import to_static
+
+from ppocr.data import create_operators, transform
+from ppocr.modeling.architectures import build_model
+from ppocr.postprocess import build_post_process
+from ppocr.utils.save_load import init_model
+from ppocr.utils.utility import get_image_file_list
+import tools.program as program
+import cv2
+
+def main(config, device, logger, vdl_writer):
+    global_config = config['Global']
+
+    # build post process
+    post_process_class = build_post_process(config['PostProcess'],
+                                            global_config)
+
+    # build model
+    if hasattr(post_process_class, 'character'):
+        config['Architecture']["Head"]['out_channels'] = len(
+            getattr(post_process_class, 'character'))
+
+    model = build_model(config['Architecture'])
+
+    init_model(config, model, logger)
+
+    # create data ops
+    transforms = []
+    use_padding = False
+    for op in config['Eval']['dataset']['transforms']:
+        op_name = list(op)[0]
+        if 'Label' in op_name:
+            continue
+        if op_name == 'KeepKeys':
+            op[op_name]['keep_keys'] = ['image']
+        if op_name == "ResizeTableImage":
+            use_padding = True
+            padding_max_len = op['ResizeTableImage']['max_len']
+        transforms.append(op)
+
+    global_config['infer_mode'] = True
+    ops = create_operators(transforms, global_config)
+
+    model.eval()
+    for file in get_image_file_list(config['Global']['infer_img']):
+        logger.info("infer_img: {}".format(file))
+        with open(file, 'rb') as f:
+            img = f.read()
+            data = {'image': img}
+        batch = transform(data, ops)
+        images = np.expand_dims(batch[0], axis=0)
+        images = paddle.to_tensor(images)
+        preds = model(images)
+        post_result = post_process_class(preds)
+        res_html_code = post_result['res_html_code']
+        res_loc = post_result['res_loc']
+        img = cv2.imread(file)
+        imgh, imgw = img.shape[0:2]
+        res_loc_final = []
+        for rno in range(len(res_loc[0])):
+            x0, y0, x1, y1 = res_loc[0][rno]
+            left = max(int(imgw * x0), 0)
+            top = max(int(imgh * y0), 0)
+            right = min(int(imgw * x1), imgw - 1)
+            bottom = min(int(imgh * y1), imgh - 1)
+            cv2.rectangle(img, (left, top), (right, bottom), (0, 0, 255), 2)
+            res_loc_final.append([left, top, right, bottom])
+        res_loc_str = json.dumps(res_loc_final)
+        logger.info("result: {}, {}".format(res_html_code, res_loc_final))
+    logger.info("success!")
+
+
+if __name__ == '__main__':
+    config, device, logger, vdl_writer = program.preprocess()
+    main(config, device, logger, vdl_writer)
+
--- a/tools/program.py
+++ b/tools/program.py
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -186,6 +186,7 @@ def train(config,
    model.train()

    use_srn = config['Architecture']['algorithm'] == "SRN"
+    model_type = config['Architecture']['model_type']

    if 'start_epoch' in best_model_dict:
        start_epoch = best_model_dict['start_epoch']
@@ -208,9 +209,9 @@ def train(config,
            lr = optimizer.get_lr()
            images = batch[0]
            if use_srn:
-                others = batch[-4:]
-                preds = model(images, others)
                model_average = True
+            if use_srn or model_type == 'table':
+                preds = model(images, data=batch[1:])
            else:
                preds = model(images)
            loss = loss_class(preds, batch)
@@ -232,8 +233,11 @@ def train(config,

            if cal_metric_during_train:  # only rec and cls need
                batch = [item.numpy() for item in batch]
-                post_result = post_process_class(preds, batch[1])
-                eval_class(post_result, batch)
+                if model_type == 'table':
+                    eval_class(preds, batch)
+                else:
+                    post_result = post_process_class(preds, batch[1])
+                    eval_class(post_result, batch)
                metric = eval_class.get_metric()
                train_stats.update(metric)

@@ -269,6 +273,7 @@ def train(config,
                    valid_dataloader,
                    post_process_class,
                    eval_class,
+                    model_type,
                    use_srn=use_srn)
                cur_metric_str = 'cur metric, {}'.format(', '.join(
                    ['{}: {}'.format(k, v) for k, v in cur_metric.items()]))
@@ -336,7 +341,11 @@ def train(config,
    return


-def eval(model, valid_dataloader, post_process_class, eval_class,
+def eval(model,
+         valid_dataloader,
+         post_process_class,
+         eval_class,
+         model_type,
         use_srn=False):
    model.eval()
    with paddle.no_grad():
@@ -350,19 +359,19 @@ def eval(model, valid_dataloader, post_process_class, eval_class,
                break
            images = batch[0]
            start = time.time()
-
-            if use_srn:
-                others = batch[-4:]
-                preds = model(images, others)
+            if use_srn or model_type == 'table':
+                preds = model(images, data=batch[1:])
            else:
                preds = model(images)
-
            batch = [item.numpy() for item in batch]
            # Obtain usable results from post-processing methods
-            post_result = post_process_class(preds, batch[1])
            total_time += time.time() - start
            # Evaluate the results of the current batch
-            eval_class(post_result, batch)
+            if model_type == 'table':
+                eval_class(preds, batch)
+            else:
+                post_result = post_process_class(preds, batch[1])
+                eval_class(post_result, batch)
            pbar.update(1)
            total_frame += len(images)
        # Get final metric，eg. acc or hmean
@@ -386,7 +395,7 @@ def preprocess(is_train=False):
    alg = config['Architecture']['algorithm']
    assert alg in [
        'EAST', 'DB', 'SAST', 'Rosetta', 'CRNN', 'STARNet', 'RARE', 'SRN',
-        'CLS', 'PGNet', 'Distillation'
+        'CLS', 'PGNet', 'Distillation', 'TableAttn'
    ]

    device = 'gpu:{}'.format(dist.ParallelEnv().dev_id) if use_gpu else 'cpu'

--- a/tools/train.py
+++ b/tools/train.py
@@ -35,7 +35,7 @@ from ppocr.losses import build_loss
 from ppocr.optimizer import build_optimizer
 from ppocr.postprocess import build_post_process
 from ppocr.metrics import build_metric
-from ppocr.utils.save_load import init_model
+from ppocr.utils.save_load import init_model, load_dygraph_params
 import tools.program as program

 dist.get_world_size()
@@ -97,7 +97,7 @@ def main(config, device, logger, vdl_writer):
    # build metric
    eval_class = build_metric(config['Metric'])
    # load pretrain model
-    pre_best_model_dict = init_model(config, model, optimizer)
+    pre_best_model_dict = load_dygraph_params(config, model, logger, optimizer)

    logger.info('train dataloader has {} iters'.format(len(train_dataloader)))
    if valid_dataloader is not None: