update predict_system.py predict_det.py predict_cls.py predict_rec.py utility.py README.md

b3da71f5 · chenxj · 3f11da7d · b3da71f5 · b3da71f5 · b3da71f5
Commit b3da71f5 authored Jul 24, 2023 by chenxj
6 changed files
--- a/README.md
+++ b/README.md
@@ -62,6 +62,10 @@ python3 -m paddle.distributed.launch --gpus '0,1,2,3'  tools/eval.py -c configs/
 ```
 python3 tools/infer/predict_system.py --image_dir="./doc/imgs/" --det_model_dir="./ch_PP-OCRv3_det_infer/" --rec_model_dir="./ch_PP-OCRv3_rec_infer/" --use_angle_cls=false --rec_image_shape=3,48,320 --warmup=1
 ```
+### 推理(ort)
+```
+python3 tools/infer/predict_system.py --image_dir="./doc/imgs/" --det_model_dir="./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det.onnx" --cls_model_dir="./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx" --rec_model_dir="./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec.onnx" --use_onnx=true --use_angle_cls=true --rec_image_shape=3,48,320 --warmup=1
+```
 ## 性能和准确率数据
 检测模型测试

--- a/tools/infer/predict_cls.py
+++ b/tools/infer/predict_cls.py
@@ -58,6 +58,64 @@ class TextClassifier(object):
        padding_im[:, :, 0:resized_w] = resized_image
        return padding_im
+    def resize_norm_img_section(self, img, max_wh_ratio):
+        # print("rec resize for section")
+        imgC, imgH, imgW = self.cls_image_shape
+        assert imgC == img.shape[2]
+        rec_precision_level = os.environ.get("OCR_REC_PRECISION")
+        max_w = imgH * 48
+        # max_w = 2304
+        if rec_precision_level =='0':
+            imgW = max_w
+        elif rec_precision_level == '1':
+            imgW = int((imgH * max_wh_ratio))
+            if imgW <= max_w / 2:
+                imgW = max_w / 2
+            else:
+                imgW = max_w
+        elif rec_precision_level == '2':
+            imgW = int((imgH * max_wh_ratio))
+            if imgW <= max_w / 4:
+                imgW = max_w / 4
+            elif imgW > max_w / 4 and imgW <= max_w / 2:
+                imgW = max_w / 2
+            elif imgW > max_w / 2 and imgW <= 3 * max_w / 4:
+                imgW = 3 * max_w / 4
+            else:
+                imgW = max_w
+        else:
+            imgW = int((imgH * max_wh_ratio))
+            if imgW <= max_w / 6:
+                imgW = max_w / 6
+            elif imgW > max_w / 6 and imgW <= max_w / 3:
+                imgW = max_w / 3
+            elif imgW > max_w / 3 and imgW <= max_w / 2:
+                imgW = max_w / 2
+            elif imgW > max_w / 2 and imgW <= 2 * max_w / 3:
+                imgW = 2 * max_w / 3
+            elif imgW > 2 *max_w / 3 and imgW <= 5 * max_w / 6:
+                imgW = 5 * max_w / 6
+            else:
+                imgW = max_w
+        imgW = int(imgW)
+        h, w = img.shape[:2]
+        ratio = w / float(h)
+        if math.ceil(imgH * ratio) > imgW:
+            resized_w = imgW
+        else:
+            resized_w = int(math.ceil(imgH * ratio))
+        resized_image = cv2.resize(img, (resized_w, imgH))
+        resized_image = resized_image.astype('float32')
+        resized_image = resized_image.transpose((2, 0, 1)) / 255
+        resized_image -= 0.5
+        resized_image /= 0.5
+        padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
+        padding_im[:, :, 0:resized_w] = resized_image
+        return padding_im
    def __call__(self, img_list):
        img_list = copy.deepcopy(img_list)
        img_num = len(img_list)
@@ -69,23 +127,81 @@ class TextClassifier(object):
        indices = np.argsort(np.array(width_list))
        cls_res = [['', 0.0]] * img_num
-        batch_num = self.cls_batch_num
+        if img_num <= 0:
-        elapse = 0
+            return cls_res, 0
-        for beg_img_no in range(0, img_num, batch_num):
+        max_batnum = 24
+        min_batnum = 8
+        if os.environ.get("OCR_REC_MAX_BATNUM") is not None:
+            max_batnum = int(os.environ.get("OCR_REC_MAX_BATNUM"))
+        if os.environ.get("OCR_REC_MIN_BATNUM") is not None:
+            min_batnum = int(os.environ.get("OCR_REC_MIN_BATNUM"))
+        assert max_batnum / min_batnum == int(max_batnum / min_batnum), "max_batnum must be multiple of min_batnum."
+        img_num_left = img_num
+        img_no_count = 0
+        st = time.time()
+        if img_num_left > max_batnum:
+            batch_num = max_batnum
+            batch_num = int(batch_num)
+            for beg_img_no in range(img_no_count, int(img_num_left / batch_num) * batch_num, batch_num):
+                end_img_no = beg_img_no + batch_num
+                norm_img_batch = []
+                max_wh_ratio = 0
+                for ino in range(beg_img_no, end_img_no):
+                    h, w = img_list[indices[ino]].shape[0:2]
+                    wh_ratio = w * 1.0 / h
+                    max_wh_ratio = max(max_wh_ratio, wh_ratio)
+                for ino in range(beg_img_no, end_img_no):
+                    norm_img = self.resize_norm_img_section(img_list[indices[ino]], max_wh_ratio)
+                    norm_img = norm_img[np.newaxis, :]
+                    norm_img_batch.append(norm_img)
-            end_img_no = min(img_num, beg_img_no + batch_num)
+                norm_img_batch = np.concatenate(norm_img_batch, axis=0)
+                norm_img_batch = norm_img_batch.copy()
+                if self.use_onnx:
+                    input_dict = {}
+                    input_dict[self.input_tensor.name] = norm_img_batch
+                    outputs = self.predictor.run(self.output_tensors, input_dict)
+                    prob_out = outputs[0]
+                else:
+                    self.input_tensor.copy_from_cpu(norm_img_batch)
+                    self.predictor.run()
+                    prob_out = self.output_tensors[0].copy_to_cpu()
+                    self.predictor.try_shrink_memory()
+                cls_result = self.postprocess_op(prob_out)
+                for rno in range(len(cls_result)):
+                    label, score = cls_result[rno]
+                    cls_res[indices[beg_img_no + rno]] = [label, score]
+                    if '180' in label and score > self.cls_thresh:
+                        img_list[indices[beg_img_no + rno]] = cv2.rotate(
+                            img_list[indices[beg_img_no + rno]], 1)
+            img_no_count = int(img_num_left / batch_num) * batch_num
+            img_num_left = img_num_left - int(img_num_left / batch_num) * batch_num
+        batch_num = math.ceil(img_num_left / min_batnum) * min_batnum
+        batch_num = int(batch_num)
+        Dnum = batch_num - img_num_left
+        for dno in range(Dnum):
+            indices = np.append(indices,img_num + dno)
+            cls_res.append(['', 0.0])
+        beg_img_no = img_no_count
+        end_img_no = img_num
        norm_img_batch = []
        max_wh_ratio = 0
-            starttime = time.time()
        for ino in range(beg_img_no, end_img_no):
            h, w = img_list[indices[ino]].shape[0:2]
            wh_ratio = w * 1.0 / h
            max_wh_ratio = max(max_wh_ratio, wh_ratio)
        for ino in range(beg_img_no, end_img_no):
-                norm_img = self.resize_norm_img(img_list[indices[ino]])
+            norm_img = self.resize_norm_img_section(img_list[indices[ino]], max_wh_ratio)
            norm_img = norm_img[np.newaxis, :]
            norm_img_batch.append(norm_img)
        norm_img_batch = np.concatenate(norm_img_batch)
+        if norm_img_batch.shape[0] != batch_num:
+            img_tmp = np.zeros((batch_num - norm_img_batch.shape[0], norm_img_batch.shape[1], norm_img_batch.shape[2], norm_img_batch.shape[3]), dtype=np.float32)
+            norm_img_batch = np.concatenate([norm_img_batch, img_tmp])
        norm_img_batch = norm_img_batch.copy()
        if self.use_onnx:
@@ -99,14 +215,14 @@ class TextClassifier(object):
            prob_out = self.output_tensors[0].copy_to_cpu()
            self.predictor.try_shrink_memory()
        cls_result = self.postprocess_op(prob_out)
-            elapse += time.time() - starttime
        for rno in range(len(cls_result)):
            label, score = cls_result[rno]
            cls_res[indices[beg_img_no + rno]] = [label, score]
-                if '180' in label and score > self.cls_thresh:
+            if '180' in label and score > self.cls_thresh and (beg_img_no + rno) < img_num:
                img_list[indices[beg_img_no + rno]] = cv2.rotate(
                    img_list[indices[beg_img_no + rno]], 1)
-        return img_list, cls_res, elapse
+        return img_list, cls_res, time.time() - st
 def main(args):

--- a/tools/infer/predict_det.py
+++ b/tools/infer/predict_det.py
@@ -120,6 +120,11 @@ class TextDetector(object):
        # print(img.shape)
        img = img.copy()
+        if self.use_onnx:
+            input_dict = {}
+            input_dict[self.input_tensor.name] = img
+            outputs = self.predictor.run(self.output_tensors, input_dict)
+        else:
            self.input_tensor.copy_from_cpu(img)
            self.predictor.run()
            paddle.device.cuda.synchronize()

--- a/tools/infer/predict_rec.py
+++ b/tools/infer/predict_rec.py
@@ -33,6 +33,7 @@ class TextRecognizer(object):
        self.postprocess_op = build_post_process(postprocess_params)
        self.predictor, self.input_tensor, self.output_tensors, self.config = \
            utility.create_predictor(args, 'rec', logger)
+        self.use_onnx = args.use_onnx
    def resize_norm_img_section(self, img, max_wh_ratio):
        # print("rec resize for section")
@@ -133,6 +134,13 @@ class TextRecognizer(object):
                norm_img_batch = np.concatenate(norm_img_batch, axis=0)
                norm_img_batch = norm_img_batch.copy()
+                if self.use_onnx:
+                    input_dict = {}
+                    input_dict[self.input_tensor.name] = norm_img_batch
+                    outputs = self.predictor.run(self.output_tensors,
+                                                 input_dict)
+                    preds = outputs[0]
+                else:
                    self.input_tensor.copy_from_cpu(norm_img_batch)
                    self.predictor.run()
@@ -176,6 +184,13 @@ class TextRecognizer(object):
            norm_img_batch = np.concatenate([norm_img_batch, img_tmp])
        norm_img_batch = norm_img_batch.copy()
+        if self.use_onnx:
+            input_dict = {}
+            input_dict[self.input_tensor.name] = norm_img_batch
+            outputs = self.predictor.run(self.output_tensors,
+                                            input_dict)
+            preds = outputs[0]
+        else:
            self.input_tensor.copy_from_cpu(norm_img_batch)
            self.predictor.run()

--- a/tools/infer/predict_system.py
+++ b/tools/infer/predict_system.py
@@ -133,6 +133,7 @@ def main(args):
                img_rec_list = []
                for i in range(min_batnum * (bn + 1)):
                    img_rec_list.append(img_warm_rec)
+                cls_results = text_sys.text_classifier(img_rec_list)
                rec_results = text_sys.text_recognizer(img_rec_list)
        elapsewarm = time.time() - startwarm
        logger.debug("warmup time:{}".format(elapsewarm))

--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@@ -107,6 +107,16 @@ def create_predictor(args, mode, logger):
        logger.info("not find {} model file path {}".format(mode, model_dir))
        sys.exit(0)
+    if args.use_onnx:
+        import onnxruntime as ort
+        model_file_path = model_dir
+        if not os.path.exists(model_file_path):
+            raise ValueError("not find model file path {}".format(
+                model_file_path))
+        sess = ort.InferenceSession(model_file_path, providers=[('ROCMExecutionProvider', {'device_id': '4'}),'CPUExecutionProvider'])
+        return sess, sess.get_inputs()[0], None, None
+    else:
        model_file_path = model_dir + "/inference.pdmodel"
        params_file_path = model_dir + "/inference.pdiparams"
        if not os.path.exists(model_file_path):