Merge branch 'dygraph' of https://github.com/PaddlePaddle/PaddleOCR into doc

d55e9065 · WenmuZhou · 0d6a4862 · 2b6c887a · d55e9065 · d55e9065
Commit d55e9065 authored Apr 27, 2022 by WenmuZhou
8 changed files
--- a/test_tipc/configs/ch_ppocr_server_v2.0_rec/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt
+++ b/test_tipc/configs/ch_ppocr_server_v2.0_rec/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt
+===========================train_params===========================
+model_name:ch_ppocr_server_v2.0_rec
+python:python3.7
+gpu_list:0|0,1
+Global.use_gpu:True|True
+Global.auto_cast:amp
+Global.epoch_num:lite_train_lite_infer=5|whole_train_whole_infer=100
+Global.save_model_dir:./output/
+Train.loader.batch_size_per_card:lite_train_lite_infer=128|whole_train_whole_infer=128
+Global.pretrained_model:null
+train_model_name:latest
+train_infer_img_dir:./inference/rec_inference
+null:null
+##
+trainer:norm_train
+norm_train:tools/train.py -c test_tipc/configs/ch_ppocr_server_v2.0_rec/rec_icdar15_train.yml -o
+pact_train:null
+fpgm_train:null
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params===========================
+eval:tools/eval.py -c test_tipc/configs/ch_ppocr_server_v2.0_rec/rec_icdar15_train.yml -o
+null:null
+##
+===========================infer_params===========================
+Global.save_inference_dir:./output/
+Global.checkpoints:
+norm_export:tools/export_model.py -c test_tipc/configs/ch_ppocr_server_v2.0_rec/rec_icdar15_train.yml -o
+quant_export:null
+fpgm_export:null
+distill_export:null
+export1:null
+export2:null
+##
+train_model:./inference/ch_ppocr_server_v2.0_rec_train/best_accuracy
+infer_export:tools/export_model.py -c test_tipc/configs/ch_ppocr_server_v2.0_rec/rec_icdar15_train.yml -o
+infer_quant:False
+inference:tools/infer/predict_rec.py
+--use_gpu:True|False
+--enable_mkldnn:True|False
+--cpu_threads:1|6
+--rec_batch_num:1|6
+--use_tensorrt:True|False
+--precision:fp32|int8
+--rec_model_dir:
+--image_dir:./inference/rec_inference
+--save_log_path:./test/output/
+--benchmark:True
+null:null
+===========================infer_benchmark_params==========================
+random_infer_input:[{float32,[3,32,100]}]
--- a/tools/eval.py
+++ b/tools/eval.py
@@ -74,9 +74,11 @@ def main():
    model = build_model(config['Architecture'])
    extra_input_models = ["SRN", "NRTR", "SAR", "SEED", "SVTR"]
+    extra_input = False
    if config['Architecture']['algorithm'] == 'Distillation':
-        extra_input = config['Architecture']['Models']['Teacher'][
+        for key in config['Architecture']["Models"]:
-            'algorithm'] in extra_input_models
+            extra_input = extra_input or config['Architecture']['Models'][key][
+                'algorithm'] in extra_input_models
    else:
        extra_input = config['Architecture']['algorithm'] in extra_input_models
    if "model_type" in config['Architecture'].keys():

--- a/tools/export_model.py
+++ b/tools/export_model.py
@@ -31,7 +31,7 @@ from ppocr.utils.logging import get_logger
 from tools.program import load_config, merge_config, ArgsParser
-def export_single_model(model, arch_config, save_path, logger):
+def export_single_model(model, arch_config, save_path, logger, quanter=None):
    if arch_config["algorithm"] == "SRN":
        max_text_length = arch_config["Head"]["max_text_length"]
        other_shape = [
@@ -61,6 +61,11 @@ def export_single_model(model, arch_config, save_path, logger):
                paddle.static.InputSpec(
                    shape=[None, 3, 48, -1], dtype="float32"),
            ]
+        else:
+            other_shape = [
+                paddle.static.InputSpec(
+                    shape=[None, 3, 64, 256], dtype="float32"),
+            ]
        model = to_static(model, input_spec=other_shape)
    elif arch_config["algorithm"] == "PREN":
        other_shape = [
@@ -90,7 +95,10 @@ def export_single_model(model, arch_config, save_path, logger):
                    shape=[None] + infer_shape, dtype="float32")
            ])
-    paddle.jit.save(model, save_path)
+    if quanter is None:
+        paddle.jit.save(model, save_path)
+    else:
+        quanter.save_quantized_model(model, save_path)
    logger.info("inference model is saved to {}".format(save_path))
    return
@@ -120,7 +128,6 @@ def main():
                        char_num = char_num - 2
                    out_channels_list['CTCLabelDecode'] = char_num
                    out_channels_list['SARLabelDecode'] = char_num + 2
-                    loss_list = config['Loss']['loss_config_list']
                    config['Architecture']['Models'][key]['Head'][
                        'out_channels_list'] = out_channels_list
                else:

--- a/tools/infer/predict_rec.py
+++ b/tools/infer/predict_rec.py
@@ -131,6 +131,17 @@ class TextRecognizer(object):
        padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
        padding_im[:, :, 0:resized_w] = resized_image
        return padding_im
+    def resize_norm_img_svtr(self, img, image_shape):
+        imgC, imgH, imgW = image_shape
+        resized_image = cv2.resize(
+            img, (imgW, imgH), interpolation=cv2.INTER_LINEAR)
+        resized_image = resized_image.astype('float32')
+        resized_image = resized_image.transpose((2, 0, 1)) / 255
+        resized_image -= 0.5
+        resized_image /= 0.5
+        return resized_image
    def resize_norm_img_srn(self, img, image_shape):
        imgC, imgH, imgW = image_shape
@@ -263,12 +274,8 @@ class TextRecognizer(object):
                wh_ratio = w * 1.0 / h
                max_wh_ratio = max(max_wh_ratio, wh_ratio)
            for ino in range(beg_img_no, end_img_no):
-                if self.rec_algorithm != "SRN" and self.rec_algorithm != "SAR":
-                    norm_img = self.resize_norm_img(img_list[indices[ino]],
+                if self.rec_algorithm == "SAR":
-                                                    max_wh_ratio)
-                    norm_img = norm_img[np.newaxis, :]
-                    norm_img_batch.append(norm_img)
-                elif self.rec_algorithm == "SAR":
                    norm_img, _, _, valid_ratio = self.resize_norm_img_sar(
                        img_list[indices[ino]], self.rec_image_shape)
                    norm_img = norm_img[np.newaxis, :]
@@ -276,7 +283,7 @@ class TextRecognizer(object):
                    valid_ratios = []
                    valid_ratios.append(valid_ratio)
                    norm_img_batch.append(norm_img)
-                else:
+                elif self.rec_algorithm == "SRN":
                    norm_img = self.process_image_srn(
                        img_list[indices[ino]], self.rec_image_shape, 8, 25)
                    encoder_word_pos_list = []
@@ -288,6 +295,16 @@ class TextRecognizer(object):
                    gsrm_slf_attn_bias1_list.append(norm_img[3])
                    gsrm_slf_attn_bias2_list.append(norm_img[4])
                    norm_img_batch.append(norm_img[0])
+                elif self.rec_algorithm == "SVTR":
+                    norm_img = self.resize_norm_img_svtr(
+                        img_list[indices[ino]], self.rec_image_shape)
+                    norm_img = norm_img[np.newaxis, :]
+                    norm_img_batch.append(norm_img)
+                else:
+                    norm_img = self.resize_norm_img(img_list[indices[ino]],
+                                                    max_wh_ratio)
+                    norm_img = norm_img[np.newaxis, :]
+                    norm_img_batch.append(norm_img)
            norm_img_batch = np.concatenate(norm_img_batch)
            norm_img_batch = norm_img_batch.copy()
            if self.benchmark:

--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@@ -271,9 +271,10 @@ def create_predictor(args, mode, logger):
            elif mode == "rec":
                if args.rec_algorithm != "CRNN":
                    use_dynamic_shape = False
-                min_input_shape = {"x": [1, 3, 32, 10]}
+                imgH = int(args.rec_image_shape.split(',')[-2])
-                max_input_shape = {"x": [args.rec_batch_num, 3, 32, 1536]}
+                min_input_shape = {"x": [1, 3, imgH, 10]}
-                opt_input_shape = {"x": [args.rec_batch_num, 3, 32, 320]}
+                max_input_shape = {"x": [args.rec_batch_num, 3, imgH, 1536]}
+                opt_input_shape = {"x": [args.rec_batch_num, 3, imgH, 320]}
            elif mode == "cls":
                min_input_shape = {"x": [1, 3, 48, 10]}
                max_input_shape = {"x": [args.rec_batch_num, 3, 48, 1024]}
@@ -300,8 +301,8 @@ def create_predictor(args, mode, logger):
        # enable memory optim
        config.enable_memory_optim()
        config.disable_glog_info()
        config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass")
+        config.delete_pass("matmul_transpose_reshape_fuse_pass")
        if mode == 'table':
            config.delete_pass("fc_fuse_pass")  # not supported for table
        config.switch_use_feed_fetch_ops(False)

--- a/tools/infer_cls.py
+++ b/tools/infer_cls.py
@@ -57,6 +57,8 @@ def main():
            continue
        elif op_name == 'KeepKeys':
            op[op_name]['keep_keys'] = ['image']
+        elif op_name == "SSLRotateResize":
+            op[op_name]["mode"] = "test"
        transforms.append(op)
    global_config['infer_mode'] = True
    ops = create_operators(transforms, global_config)

--- a/tools/program.py
+++ b/tools/program.py
@@ -202,9 +202,11 @@ def train(config,
    use_srn = config['Architecture']['algorithm'] == "SRN"
    extra_input_models = ["SRN", "NRTR", "SAR", "SEED", "SVTR"]
+    extra_input = False
    if config['Architecture']['algorithm'] == 'Distillation':
-        extra_input = config['Architecture']['Models']['Teacher'][
+        for key in config['Architecture']["Models"]:
-            'algorithm'] in extra_input_models
+            extra_input = extra_input or config['Architecture']['Models'][key][
+                'algorithm'] in extra_input_models
    else:
        extra_input = config['Architecture']['algorithm'] in extra_input_models
    try:

--- a/tools/train.py
+++ b/tools/train.py
@@ -129,7 +129,7 @@ def main(config, device, logger, vdl_writer):
        config['Optimizer'],
        epochs=config['Global']['epoch_num'],
        step_each_epoch=len(train_dataloader),
-        parameters=model.parameters())
+        model=model)
    # build metric
    eval_class = build_metric(config['Metric'])