update predict_system.py predict_det.py predict_cls.py predict_rec.py utility.py README.md

b3da71f5 · chenxj · 3f11da7d · b3da71f5 · b3da71f5 · b3da71f5
Commit b3da71f5 authored Jul 24, 2023 by chenxj
6 changed files
--- a/README.md
+++ b/README.md
@@ -62,6 +62,10 @@ python3 -m paddle.distributed.launch --gpus '0,1,2,3'  tools/eval.py -c configs/
 ```
 python3 tools/infer/predict_system.py --image_dir="./doc/imgs/" --det_model_dir="./ch_PP-OCRv3_det_infer/" --rec_model_dir="./ch_PP-OCRv3_rec_infer/" --use_angle_cls=false --rec_image_shape=3,48,320 --warmup=1
 ```
+### 推理(ort)
+```
+python3 tools/infer/predict_system.py --image_dir="./doc/imgs/" --det_model_dir="./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det.onnx" --cls_model_dir="./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx" --rec_model_dir="./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec.onnx" --use_onnx=true --use_angle_cls=true --rec_image_shape=3,48,320 --warmup=1
+```
 ## 性能和准确率数据
 检测模型测试

--- a/tools/infer/predict_cls.py
+++ b/tools/infer/predict_cls.py
@@ -57,6 +57,64 @@ class TextClassifier(object):
        padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
        padding_im[:, :, 0:resized_w] = resized_image
        return padding_im
+    def resize_norm_img_section(self, img, max_wh_ratio):
+        # print("rec resize for section")
+        imgC, imgH, imgW = self.cls_image_shape
+        assert imgC == img.shape[2]
+        rec_precision_level = os.environ.get("OCR_REC_PRECISION")
+        max_w = imgH * 48
+        # max_w = 2304
+        if rec_precision_level =='0':
+            imgW = max_w
+        elif rec_precision_level == '1':
+            imgW = int((imgH * max_wh_ratio))
+            if imgW <= max_w / 2:
+                imgW = max_w / 2
+            else:
+                imgW = max_w
+        elif rec_precision_level == '2':
+            imgW = int((imgH * max_wh_ratio))
+            if imgW <= max_w / 4:
+                imgW = max_w / 4
+            elif imgW > max_w / 4 and imgW <= max_w / 2:
+                imgW = max_w / 2
+            elif imgW > max_w / 2 and imgW <= 3 * max_w / 4:
+                imgW = 3 * max_w / 4
+            else:
+                imgW = max_w
+        else:
+            imgW = int((imgH * max_wh_ratio))
+            if imgW <= max_w / 6:
+                imgW = max_w / 6
+            elif imgW > max_w / 6 and imgW <= max_w / 3:
+                imgW = max_w / 3
+            elif imgW > max_w / 3 and imgW <= max_w / 2:
+                imgW = max_w / 2
+            elif imgW > max_w / 2 and imgW <= 2 * max_w / 3:
+                imgW = 2 * max_w / 3
+            elif imgW > 2 *max_w / 3 and imgW <= 5 * max_w / 6:
+                imgW = 5 * max_w / 6
+            else:
+                imgW = max_w
+        imgW = int(imgW)
+        h, w = img.shape[:2]
+        ratio = w / float(h)
+        if math.ceil(imgH * ratio) > imgW:
+            resized_w = imgW
+        else:
+            resized_w = int(math.ceil(imgH * ratio))
+        resized_image = cv2.resize(img, (resized_w, imgH))
+        resized_image = resized_image.astype('float32')
+        resized_image = resized_image.transpose((2, 0, 1)) / 255
+        resized_image -= 0.5
+        resized_image /= 0.5
+        padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
+        padding_im[:, :, 0:resized_w] = resized_image
+        return padding_im
    def __call__(self, img_list):
        img_list = copy.deepcopy(img_list)
@@ -69,44 +127,102 @@ class TextClassifier(object):
        indices = np.argsort(np.array(width_list))
        cls_res = [['', 0.0]] * img_num
-        batch_num = self.cls_batch_num
+        if img_num <= 0:
-        elapse = 0
+            return cls_res, 0
-        for beg_img_no in range(0, img_num, batch_num):
+        max_batnum = 24
+        min_batnum = 8
-            end_img_no = min(img_num, beg_img_no + batch_num)
+        if os.environ.get("OCR_REC_MAX_BATNUM") is not None:
-            norm_img_batch = []
+            max_batnum = int(os.environ.get("OCR_REC_MAX_BATNUM"))
-            max_wh_ratio = 0
+        if os.environ.get("OCR_REC_MIN_BATNUM") is not None:
-            starttime = time.time()
+            min_batnum = int(os.environ.get("OCR_REC_MIN_BATNUM"))
-            for ino in range(beg_img_no, end_img_no):
+        assert max_batnum / min_batnum == int(max_batnum / min_batnum), "max_batnum must be multiple of min_batnum."
-                h, w = img_list[indices[ino]].shape[0:2]
+        img_num_left = img_num
-                wh_ratio = w * 1.0 / h
+        img_no_count = 0
-                max_wh_ratio = max(max_wh_ratio, wh_ratio)
+        st = time.time()
-            for ino in range(beg_img_no, end_img_no):
+        if img_num_left > max_batnum:
-                norm_img = self.resize_norm_img(img_list[indices[ino]])
+            batch_num = max_batnum
-                norm_img = norm_img[np.newaxis, :]
+            batch_num = int(batch_num)
-                norm_img_batch.append(norm_img)
+            for beg_img_no in range(img_no_count, int(img_num_left / batch_num) * batch_num, batch_num):
-            norm_img_batch = np.concatenate(norm_img_batch)
+                end_img_no = beg_img_no + batch_num
-            norm_img_batch = norm_img_batch.copy()
+                norm_img_batch = []
+                max_wh_ratio = 0
-            if self.use_onnx:
+                for ino in range(beg_img_no, end_img_no):
-                input_dict = {}
+                    h, w = img_list[indices[ino]].shape[0:2]
-                input_dict[self.input_tensor.name] = norm_img_batch
+                    wh_ratio = w * 1.0 / h
-                outputs = self.predictor.run(self.output_tensors, input_dict)
+                    max_wh_ratio = max(max_wh_ratio, wh_ratio)
-                prob_out = outputs[0]
+                for ino in range(beg_img_no, end_img_no):
-            else:
+                    norm_img = self.resize_norm_img_section(img_list[indices[ino]], max_wh_ratio)
-                self.input_tensor.copy_from_cpu(norm_img_batch)
+                    norm_img = norm_img[np.newaxis, :]
-                self.predictor.run()
+                    norm_img_batch.append(norm_img)
-                prob_out = self.output_tensors[0].copy_to_cpu()
-                self.predictor.try_shrink_memory()
+                norm_img_batch = np.concatenate(norm_img_batch, axis=0)
-            cls_result = self.postprocess_op(prob_out)
+                norm_img_batch = norm_img_batch.copy()
-            elapse += time.time() - starttime
-            for rno in range(len(cls_result)):
+                if self.use_onnx:
-                label, score = cls_result[rno]
+                    input_dict = {}
-                cls_res[indices[beg_img_no + rno]] = [label, score]
+                    input_dict[self.input_tensor.name] = norm_img_batch
-                if '180' in label and score > self.cls_thresh:
+                    outputs = self.predictor.run(self.output_tensors, input_dict)
-                    img_list[indices[beg_img_no + rno]] = cv2.rotate(
+                    prob_out = outputs[0]
-                        img_list[indices[beg_img_no + rno]], 1)
+                else:
-        return img_list, cls_res, elapse
+                    self.input_tensor.copy_from_cpu(norm_img_batch)
+                    self.predictor.run()
+                    prob_out = self.output_tensors[0].copy_to_cpu()
+                    self.predictor.try_shrink_memory()
+                cls_result = self.postprocess_op(prob_out)
+                for rno in range(len(cls_result)):
+                    label, score = cls_result[rno]
+                    cls_res[indices[beg_img_no + rno]] = [label, score]
+                    if '180' in label and score > self.cls_thresh:
+                        img_list[indices[beg_img_no + rno]] = cv2.rotate(
+                            img_list[indices[beg_img_no + rno]], 1)
+            img_no_count = int(img_num_left / batch_num) * batch_num
+            img_num_left = img_num_left - int(img_num_left / batch_num) * batch_num
+        batch_num = math.ceil(img_num_left / min_batnum) * min_batnum
+        batch_num = int(batch_num)
+        Dnum = batch_num - img_num_left
+        for dno in range(Dnum):
+            indices = np.append(indices,img_num + dno)
+            cls_res.append(['', 0.0])
+        beg_img_no = img_no_count
+        end_img_no = img_num
+        norm_img_batch = []
+        max_wh_ratio = 0
+        for ino in range(beg_img_no, end_img_no):
+            h, w = img_list[indices[ino]].shape[0:2]
+            wh_ratio = w * 1.0 / h
+            max_wh_ratio = max(max_wh_ratio, wh_ratio)
+        for ino in range(beg_img_no, end_img_no):
+            norm_img = self.resize_norm_img_section(img_list[indices[ino]], max_wh_ratio)
+            norm_img = norm_img[np.newaxis, :]
+            norm_img_batch.append(norm_img)
+        norm_img_batch = np.concatenate(norm_img_batch)
+        if norm_img_batch.shape[0] != batch_num:
+            img_tmp = np.zeros((batch_num - norm_img_batch.shape[0], norm_img_batch.shape[1], norm_img_batch.shape[2], norm_img_batch.shape[3]), dtype=np.float32)
+            norm_img_batch = np.concatenate([norm_img_batch, img_tmp])
+        norm_img_batch = norm_img_batch.copy()
+        if self.use_onnx:
+            input_dict = {}
+            input_dict[self.input_tensor.name] = norm_img_batch
+            outputs = self.predictor.run(self.output_tensors, input_dict)
+            prob_out = outputs[0]
+        else:
+            self.input_tensor.copy_from_cpu(norm_img_batch)
+            self.predictor.run()
+            prob_out = self.output_tensors[0].copy_to_cpu()
+            self.predictor.try_shrink_memory()
+        cls_result = self.postprocess_op(prob_out)
+        for rno in range(len(cls_result)):
+            label, score = cls_result[rno]
+            cls_res[indices[beg_img_no + rno]] = [label, score]
+            if '180' in label and score > self.cls_thresh and (beg_img_no + rno) < img_num:
+                img_list[indices[beg_img_no + rno]] = cv2.rotate(
+                    img_list[indices[beg_img_no + rno]], 1)
+        return img_list, cls_res, time.time() - st
 def main(args):

--- a/tools/infer/predict_det.py
+++ b/tools/infer/predict_det.py
@@ -120,15 +120,20 @@ class TextDetector(object):
        # print(img.shape)
        img = img.copy()
-        self.input_tensor.copy_from_cpu(img)
+        if self.use_onnx:
-        self.predictor.run()
+            input_dict = {}
-        paddle.device.cuda.synchronize()
+            input_dict[self.input_tensor.name] = img
-        outputs = []
+            outputs = self.predictor.run(self.output_tensors, input_dict)
-        for output_tensor in self.output_tensors:
+        else:
-            output = output_tensor.copy_to_cpu()
+            self.input_tensor.copy_from_cpu(img)
-            outputs.append(output)
+            self.predictor.run()
-        if self.args.benchmark:
+            paddle.device.cuda.synchronize()
-            self.autolog.times.stamp()
+            outputs = []
+            for output_tensor in self.output_tensors:
+                output = output_tensor.copy_to_cpu()
+                outputs.append(output)
+            if self.args.benchmark:
+                self.autolog.times.stamp()
        preds = {}
        if self.det_algorithm in ['DB', 'PSE']:

--- a/tools/infer/predict_rec.py
+++ b/tools/infer/predict_rec.py
@@ -33,6 +33,7 @@ class TextRecognizer(object):
        self.postprocess_op = build_post_process(postprocess_params)
        self.predictor, self.input_tensor, self.output_tensors, self.config = \
            utility.create_predictor(args, 'rec', logger)
+        self.use_onnx = args.use_onnx
    def resize_norm_img_section(self, img, max_wh_ratio):
        # print("rec resize for section")
@@ -133,17 +134,24 @@ class TextRecognizer(object):
                norm_img_batch = np.concatenate(norm_img_batch, axis=0)
                norm_img_batch = norm_img_batch.copy()
-                self.input_tensor.copy_from_cpu(norm_img_batch)
+                if self.use_onnx:
-                self.predictor.run()
+                    input_dict = {}
+                    input_dict[self.input_tensor.name] = norm_img_batch
-                outputs = []
+                    outputs = self.predictor.run(self.output_tensors,
-                for output_tensor in self.output_tensors:
+                                                 input_dict)
-                    output = output_tensor.copy_to_cpu()
-                    outputs.append(output)
-                if len(outputs) != 1:
-                    preds = outputs
-                else:
                    preds = outputs[0]
+                else:
+                    self.input_tensor.copy_from_cpu(norm_img_batch)
+                    self.predictor.run()
+                    outputs = []
+                    for output_tensor in self.output_tensors:
+                        output = output_tensor.copy_to_cpu()
+                        outputs.append(output)
+                    if len(outputs) != 1:
+                        preds = outputs
+                    else:
+                        preds = outputs[0]
                rec_result = self.postprocess_op(preds)
                for rno in range(len(rec_result)):
                    rec_res[indices[beg_img_no + rno]] = rec_result[rno]
@@ -176,17 +184,24 @@ class TextRecognizer(object):
            norm_img_batch = np.concatenate([norm_img_batch, img_tmp])
        norm_img_batch = norm_img_batch.copy()
-        self.input_tensor.copy_from_cpu(norm_img_batch)
+        if self.use_onnx:
-        self.predictor.run()
+            input_dict = {}
+            input_dict[self.input_tensor.name] = norm_img_batch
-        outputs = []
+            outputs = self.predictor.run(self.output_tensors,
-        for output_tensor in self.output_tensors:
+                                            input_dict)
-            output = output_tensor.copy_to_cpu()
-            outputs.append(output)
-        if len(outputs) != 1:
-            preds = outputs
-        else:
            preds = outputs[0]
+        else:
+            self.input_tensor.copy_from_cpu(norm_img_batch)
+            self.predictor.run()
+            outputs = []
+            for output_tensor in self.output_tensors:
+                output = output_tensor.copy_to_cpu()
+                outputs.append(output)
+            if len(outputs) != 1:
+                preds = outputs
+            else:
+                preds = outputs[0]
        rec_result = self.postprocess_op(preds)
        for rno in range(len(rec_result)):
            rec_res[indices[beg_img_no + rno]] = rec_result[rno]

--- a/tools/infer/predict_system.py
+++ b/tools/infer/predict_system.py
@@ -133,6 +133,7 @@ def main(args):
                img_rec_list = []
                for i in range(min_batnum * (bn + 1)):
                    img_rec_list.append(img_warm_rec)
+                cls_results = text_sys.text_classifier(img_rec_list)
                rec_results = text_sys.text_recognizer(img_rec_list)
        elapsewarm = time.time() - startwarm
        logger.debug("warmup time:{}".format(elapsewarm))

--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@@ -107,150 +107,160 @@ def create_predictor(args, mode, logger):
        logger.info("not find {} model file path {}".format(mode, model_dir))
        sys.exit(0)
-    model_file_path = model_dir + "/inference.pdmodel"
+    if args.use_onnx:
-    params_file_path = model_dir + "/inference.pdiparams"
+        import onnxruntime as ort
-    if not os.path.exists(model_file_path):
+        model_file_path = model_dir
-        raise ValueError("not find model file path {}".format(
+        if not os.path.exists(model_file_path):
-            model_file_path))
+            raise ValueError("not find model file path {}".format(
-    if not os.path.exists(params_file_path):
+                model_file_path))
-        raise ValueError("not find params file path {}".format(
+        sess = ort.InferenceSession(model_file_path, providers=[('ROCMExecutionProvider', {'device_id': '4'}),'CPUExecutionProvider'])
-            params_file_path))
+        return sess, sess.get_inputs()[0], None, None
-    config = inference.Config(model_file_path, params_file_path)
+    else:
+        model_file_path = model_dir + "/inference.pdmodel"
-    if hasattr(args, 'precision'):
+        params_file_path = model_dir + "/inference.pdiparams"
-        if args.precision == "fp16" and args.use_tensorrt:
+        if not os.path.exists(model_file_path):
-            precision = inference.PrecisionType.Half
+            raise ValueError("not find model file path {}".format(
-            print("fp16 set success!")
+                model_file_path))
-        elif args.precision == "int8":
+        if not os.path.exists(params_file_path):
-            precision = inference.PrecisionType.Int8
+            raise ValueError("not find params file path {}".format(
+                params_file_path))
+        config = inference.Config(model_file_path, params_file_path)
+        if hasattr(args, 'precision'):
+            if args.precision == "fp16" and args.use_tensorrt:
+                precision = inference.PrecisionType.Half
+                print("fp16 set success!")
+            elif args.precision == "int8":
+                precision = inference.PrecisionType.Int8
+            else:
+                precision = inference.PrecisionType.Float32
        else:
            precision = inference.PrecisionType.Float32
-    else:
-        precision = inference.PrecisionType.Float32
+        if args.use_gpu:
+            gpu_id = get_infer_gpuid()
-    if args.use_gpu:
+            if gpu_id is None:
-        gpu_id = get_infer_gpuid()
+                logger.warning(
-        if gpu_id is None:
+                    "GPU is not found in current device by nvidia-smi. Please check your device or ignore it if run on jetson."
-            logger.warning(
+                )
-                "GPU is not found in current device by nvidia-smi. Please check your device or ignore it if run on jetson."
+            config.enable_use_gpu(args.gpu_mem, 0)
-            )
+            use_dynamic_shape = True
-        config.enable_use_gpu(args.gpu_mem, 0)
+            if mode == "det":
-        use_dynamic_shape = True
+                min_input_shape = {
-        if mode == "det":
+                    "x": [1, 3, 50, 50],
-            min_input_shape = {
+                    "conv2d_92.tmp_0": [1, 120, 20, 20],
-                "x": [1, 3, 50, 50],
+                    "conv2d_91.tmp_0": [1, 24, 10, 10],
-                "conv2d_92.tmp_0": [1, 120, 20, 20],
+                    "conv2d_59.tmp_0": [1, 96, 20, 20],
-                "conv2d_91.tmp_0": [1, 24, 10, 10],
+                    "nearest_interp_v2_1.tmp_0": [1, 256, 10, 10],
-                "conv2d_59.tmp_0": [1, 96, 20, 20],
+                    "nearest_interp_v2_2.tmp_0": [1, 256, 20, 20],
-                "nearest_interp_v2_1.tmp_0": [1, 256, 10, 10],
+                    "conv2d_124.tmp_0": [1, 256, 20, 20],
-                "nearest_interp_v2_2.tmp_0": [1, 256, 20, 20],
+                    "nearest_interp_v2_3.tmp_0": [1, 64, 20, 20],
-                "conv2d_124.tmp_0": [1, 256, 20, 20],
+                    "nearest_interp_v2_4.tmp_0": [1, 64, 20, 20],
-                "nearest_interp_v2_3.tmp_0": [1, 64, 20, 20],
+                    "nearest_interp_v2_5.tmp_0": [1, 64, 20, 20],
-                "nearest_interp_v2_4.tmp_0": [1, 64, 20, 20],
+                    "elementwise_add_7": [1, 56, 2, 2],
-                "nearest_interp_v2_5.tmp_0": [1, 64, 20, 20],
+                    "nearest_interp_v2_0.tmp_0": [1, 256, 2, 2]
-                "elementwise_add_7": [1, 56, 2, 2],
+                }
-                "nearest_interp_v2_0.tmp_0": [1, 256, 2, 2]
+                max_input_shape = {
-            }
+                    "x": [1, 3, 1536, 1536],
-            max_input_shape = {
+                    "conv2d_92.tmp_0": [1, 120, 400, 400],
-                "x": [1, 3, 1536, 1536],
+                    "conv2d_91.tmp_0": [1, 24, 200, 200],
-                "conv2d_92.tmp_0": [1, 120, 400, 400],
+                    "conv2d_59.tmp_0": [1, 96, 400, 400],
-                "conv2d_91.tmp_0": [1, 24, 200, 200],
+                    "nearest_interp_v2_1.tmp_0": [1, 256, 200, 200],
-                "conv2d_59.tmp_0": [1, 96, 400, 400],
+                    "conv2d_124.tmp_0": [1, 256, 400, 400],
-                "nearest_interp_v2_1.tmp_0": [1, 256, 200, 200],
+                    "nearest_interp_v2_2.tmp_0": [1, 256, 400, 400],
-                "conv2d_124.tmp_0": [1, 256, 400, 400],
+                    "nearest_interp_v2_3.tmp_0": [1, 64, 400, 400],
-                "nearest_interp_v2_2.tmp_0": [1, 256, 400, 400],
+                    "nearest_interp_v2_4.tmp_0": [1, 64, 400, 400],
-                "nearest_interp_v2_3.tmp_0": [1, 64, 400, 400],
+                    "nearest_interp_v2_5.tmp_0": [1, 64, 400, 400],
-                "nearest_interp_v2_4.tmp_0": [1, 64, 400, 400],
+                    "elementwise_add_7": [1, 56, 400, 400],
-                "nearest_interp_v2_5.tmp_0": [1, 64, 400, 400],
+                    "nearest_interp_v2_0.tmp_0": [1, 256, 400, 400]
-                "elementwise_add_7": [1, 56, 400, 400],
+                }
-                "nearest_interp_v2_0.tmp_0": [1, 256, 400, 400]
+                opt_input_shape = {
-            }
+                    "x": [1, 3, 640, 640],
-            opt_input_shape = {
+                    "conv2d_92.tmp_0": [1, 120, 160, 160],
-                "x": [1, 3, 640, 640],
+                    "conv2d_91.tmp_0": [1, 24, 80, 80],
-                "conv2d_92.tmp_0": [1, 120, 160, 160],
+                    "conv2d_59.tmp_0": [1, 96, 160, 160],
-                "conv2d_91.tmp_0": [1, 24, 80, 80],
+                    "nearest_interp_v2_1.tmp_0": [1, 256, 80, 80],
-                "conv2d_59.tmp_0": [1, 96, 160, 160],
+                    "nearest_interp_v2_2.tmp_0": [1, 256, 160, 160],
-                "nearest_interp_v2_1.tmp_0": [1, 256, 80, 80],
+                    "conv2d_124.tmp_0": [1, 256, 160, 160],
-                "nearest_interp_v2_2.tmp_0": [1, 256, 160, 160],
+                    "nearest_interp_v2_3.tmp_0": [1, 64, 160, 160],
-                "conv2d_124.tmp_0": [1, 256, 160, 160],
+                    "nearest_interp_v2_4.tmp_0": [1, 64, 160, 160],
-                "nearest_interp_v2_3.tmp_0": [1, 64, 160, 160],
+                    "nearest_interp_v2_5.tmp_0": [1, 64, 160, 160],
-                "nearest_interp_v2_4.tmp_0": [1, 64, 160, 160],
+                    "elementwise_add_7": [1, 56, 40, 40],
-                "nearest_interp_v2_5.tmp_0": [1, 64, 160, 160],
+                    "nearest_interp_v2_0.tmp_0": [1, 256, 40, 40]
-                "elementwise_add_7": [1, 56, 40, 40],
+                }
-                "nearest_interp_v2_0.tmp_0": [1, 256, 40, 40]
+                min_pact_shape = {
-            }
+                    "nearest_interp_v2_26.tmp_0": [1, 256, 20, 20],
-            min_pact_shape = {
+                    "nearest_interp_v2_27.tmp_0": [1, 64, 20, 20],
-                "nearest_interp_v2_26.tmp_0": [1, 256, 20, 20],
+                    "nearest_interp_v2_28.tmp_0": [1, 64, 20, 20],
-                "nearest_interp_v2_27.tmp_0": [1, 64, 20, 20],
+                    "nearest_interp_v2_29.tmp_0": [1, 64, 20, 20]
-                "nearest_interp_v2_28.tmp_0": [1, 64, 20, 20],
+                }
-                "nearest_interp_v2_29.tmp_0": [1, 64, 20, 20]
+                max_pact_shape = {
-            }
+                    "nearest_interp_v2_26.tmp_0": [1, 256, 400, 400],
-            max_pact_shape = {
+                    "nearest_interp_v2_27.tmp_0": [1, 64, 400, 400],
-                "nearest_interp_v2_26.tmp_0": [1, 256, 400, 400],
+                    "nearest_interp_v2_28.tmp_0": [1, 64, 400, 400],
-                "nearest_interp_v2_27.tmp_0": [1, 64, 400, 400],
+                    "nearest_interp_v2_29.tmp_0": [1, 64, 400, 400]
-                "nearest_interp_v2_28.tmp_0": [1, 64, 400, 400],
+                }
-                "nearest_interp_v2_29.tmp_0": [1, 64, 400, 400]
+                opt_pact_shape = {
-            }
+                    "nearest_interp_v2_26.tmp_0": [1, 256, 160, 160],
-            opt_pact_shape = {
+                    "nearest_interp_v2_27.tmp_0": [1, 64, 160, 160],
-                "nearest_interp_v2_26.tmp_0": [1, 256, 160, 160],
+                    "nearest_interp_v2_28.tmp_0": [1, 64, 160, 160],
-                "nearest_interp_v2_27.tmp_0": [1, 64, 160, 160],
+                    "nearest_interp_v2_29.tmp_0": [1, 64, 160, 160]
-                "nearest_interp_v2_28.tmp_0": [1, 64, 160, 160],
+                }
-                "nearest_interp_v2_29.tmp_0": [1, 64, 160, 160]
+                min_input_shape.update(min_pact_shape)
-            }
+                max_input_shape.update(max_pact_shape)
-            min_input_shape.update(min_pact_shape)
+                opt_input_shape.update(opt_pact_shape)
-            max_input_shape.update(max_pact_shape)
+            elif mode == "rec":
-            opt_input_shape.update(opt_pact_shape)
+                if args.rec_algorithm not in ["CRNN", "SVTR_LCNet"]:
-        elif mode == "rec":
+                    use_dynamic_shape = False
-            if args.rec_algorithm not in ["CRNN", "SVTR_LCNet"]:
+                imgH = int(args.rec_image_shape.split(',')[-2])
+                min_input_shape = {"x": [1, 3, imgH, 10]}
+                max_input_shape = {"x": [args.rec_batch_num, 3, imgH, 2304]}
+                opt_input_shape = {"x": [args.rec_batch_num, 3, imgH, 320]}
+                config.exp_disable_tensorrt_ops(["transpose2"])
+            elif mode == "cls":
+                min_input_shape = {"x": [1, 3, 48, 10]}
+                max_input_shape = {"x": [args.rec_batch_num, 3, 48, 1024]}
+                opt_input_shape = {"x": [args.rec_batch_num, 3, 48, 320]}
+            else:
                use_dynamic_shape = False
-            imgH = int(args.rec_image_shape.split(',')[-2])
+            if use_dynamic_shape:
-            min_input_shape = {"x": [1, 3, imgH, 10]}
+                config.set_trt_dynamic_shape_info(
-            max_input_shape = {"x": [args.rec_batch_num, 3, imgH, 2304]}
+                    min_input_shape, max_input_shape, opt_input_shape)
-            opt_input_shape = {"x": [args.rec_batch_num, 3, imgH, 320]}
-            config.exp_disable_tensorrt_ops(["transpose2"])
-        elif mode == "cls":
-            min_input_shape = {"x": [1, 3, 48, 10]}
-            max_input_shape = {"x": [args.rec_batch_num, 3, 48, 1024]}
-            opt_input_shape = {"x": [args.rec_batch_num, 3, 48, 320]}
-        else:
-            use_dynamic_shape = False
-        if use_dynamic_shape:
-            config.set_trt_dynamic_shape_info(
-                min_input_shape, max_input_shape, opt_input_shape)
-    elif args.use_xpu:
+        elif args.use_xpu:
-        config.enable_xpu(10 * 1024 * 1024)
+            config.enable_xpu(10 * 1024 * 1024)
-    else:
-        config.disable_gpu()
-        if hasattr(args, "cpu_threads"):
-            config.set_cpu_math_library_num_threads(args.cpu_threads)
        else:
-            # default cpu threads as 10
+            config.disable_gpu()
-            config.set_cpu_math_library_num_threads(10)
+            if hasattr(args, "cpu_threads"):
-        if args.enable_mkldnn:
+                config.set_cpu_math_library_num_threads(args.cpu_threads)
-            # cache 10 different shapes for mkldnn to avoid memory leak
+            else:
-            config.set_mkldnn_cache_capacity(10)
+                # default cpu threads as 10
-            config.enable_mkldnn()
+                config.set_cpu_math_library_num_threads(10)
-            if args.precision == "fp16":
+            if args.enable_mkldnn:
-                config.enable_mkldnn_bfloat16()
+                # cache 10 different shapes for mkldnn to avoid memory leak
-    # enable memory optim
+                config.set_mkldnn_cache_capacity(10)
-    config.enable_memory_optim()
+                config.enable_mkldnn()
-    config.disable_glog_info()
+                if args.precision == "fp16":
-    config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass")
+                    config.enable_mkldnn_bfloat16()
-    config.delete_pass("matmul_transpose_reshape_fuse_pass")
+        # enable memory optim
-    if mode == 'table':
+        config.enable_memory_optim()
-        config.delete_pass("fc_fuse_pass")  # not supported for table
+        config.disable_glog_info()
-    config.switch_use_feed_fetch_ops(False)
+        config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass")
-    config.switch_ir_optim(True)
+        config.delete_pass("matmul_transpose_reshape_fuse_pass")
+        if mode == 'table':
-    # create predictor
+            config.delete_pass("fc_fuse_pass")  # not supported for table
-    predictor = inference.create_predictor(config)
+        config.switch_use_feed_fetch_ops(False)
-    input_names = predictor.get_input_names()
+        config.switch_ir_optim(True)
-    for name in input_names:
-        input_tensor = predictor.get_input_handle(name)
+        # create predictor
-    output_tensors = get_output_tensors(args, mode, predictor)
+        predictor = inference.create_predictor(config)
-    return predictor, input_tensor, output_tensors, config
+        input_names = predictor.get_input_names()
+        for name in input_names:
+            input_tensor = predictor.get_input_handle(name)
+        output_tensors = get_output_tensors(args, mode, predictor)
+        return predictor, input_tensor, output_tensors, config
 def get_output_tensors(args, mode, predictor):