# RapidOcr 本示例通过RapidOcr模型说明如何使用ONNXRuntime Python API进行图像文本识别模型的推理,包括如何预处理、推理并获取推理结果。 ## 模型简介 本示例使用了ch_PP-OCRv3_det + ch_ppocr_mobile_v2.0_cls + ch_PP-OCRv3_rec三个模型,onnx文件在Resource/Models/文件夹下,模型结构可以通过netron (https://netron.app/) 查看,并通过netron查询各个模型的输入输出。 ## 预处理 在将数据输入到模型之前,需要对图像做如下预处理操作: 这段代码的目的是在进行字符识别之前,对图像进行预处理,包括读取图像、调整大小、填充、缩放等操作。 本示例代码采用了OpenCV实现了预处理操作: ### TextDetector的预处理 ```python pre_process_list = { "DetResizeForTest": { "limit_side_len": config.get("limit_side_len", 736), "limit_type": config.get("limit_type", "min"), }, "NormalizeImage": { "std": [0.229, 0.224, 0.225], "mean": [0.485, 0.456, 0.406], "scale": "1./255.", "order": "hwc", }, "ToCHWImage": None, "KeepKeys": {"keep_keys": ["image", "shape"]}, } self.preprocess_op = create_operators(pre_process_list) post_process = { "thresh": config.get("thresh", 0.3), "box_thresh": config.get("box_thresh", 0.5), "max_candidates": config.get("max_candidates", 1000), "unclip_ratio": config.get("unclip_ratio", 1.6), "use_dilation": config.get("use_dilation", True), "score_mode": config.get("score_mode", "fast"), } ``` ### TextClassifier的预处理 ```python def resize_norm_img(self, img): img_c, img_h, img_w = self.cls_image_shape h, w = img.shape[:2] ratio = w / float(h) if math.ceil(img_h * ratio) > img_w: resized_w = img_w else: resized_w = int(math.ceil(img_h * ratio)) resized_image = cv2.resize(img, (resized_w, img_h)) resized_image = resized_image.astype("float32") if img_c == 1: resized_image = resized_image / 255 resized_image = resized_image[np.newaxis, :] else: resized_image = resized_image.transpose((2, 0, 1)) / 255 resized_image -= 0.5 resized_image /= 0.5 padding_im = np.zeros((img_c, img_h, img_w), dtype=np.float32) padding_im[:, :, :resized_w] = resized_image return padding_im ``` ### TextRecognizer的预处理 ```python def resize_norm_img(self, img, max_wh_ratio): img_channel, img_height, img_width = self.rec_image_shape assert img_channel == img.shape[2] img_width = int(img_height * max_wh_ratio) h, w = img.shape[:2] ratio = w / float(h) if math.ceil(img_height * ratio) > img_width: resized_w = img_width else: resized_w = int(math.ceil(img_height * ratio)) resized_image = cv2.resize(img, (resized_w, img_height)) resized_image = resized_image.astype("float32") resized_image = resized_image.transpose((2, 0, 1)) / 255 resized_image -= 0.5 resized_image /= 0.5 padding_im = np.zeros((img_channel, img_height, img_width), dtype=np.float32) padding_im[:, :, 0:resized_w] = resized_image return padding_im ``` ## 推理 ### 推理分为三部分: #### 第一部分: TextDetector使用ch_ppocr_v3_det_infer.onnx模型,这是一个预训练的文本检测模型,用于文本检测任务。它可以检测图像中的文本区域,并返回文本框的位置和边界框信息。 ```python class TextDetector: ... post_process = { "thresh": config.get("thresh", 0.3), "box_thresh": config.get("box_thresh", 0.5), "max_candidates": config.get("max_candidates", 1000), "unclip_ratio": config.get("unclip_ratio", 1.6), "use_dilation": config.get("use_dilation", True), "score_mode": config.get("score_mode", "fast"), } self.postprocess_op = DBPostProcess(**post_process) self.infer = OrtInferSession(config) ... ``` #### 第二部分: TextClassifier使用ch_ppocr_v2_cls_infer.onnx模型:这是一个预训练的分类器模型,用于文本分类任务。它可以用于判断文本属于哪个类别或类别的概率。 ```python class TextClassifier: ... def __init__(self, config): self.cls_image_shape = config["cls_image_shape"] self.cls_batch_num = config["cls_batch_num"] self.cls_thresh = config["cls_thresh"] self.postprocess_op = ClsPostProcess(config["label_list"]) self.infer = OrtInferSession(config) ... ``` #### 第三部分: TextDetector使用ch_ppocr_v3_rec_infer.onnx:这是一个预训练的文本识别模型,用于文本识别任务。它可以接收一个文本框的图像区域作为输入,并返回该区域中文本的识别 ```python class TextDetector: ... def __init__(self, config): pre_process_list = { "DetResizeForTest": { "limit_side_len": config.get("limit_side_len", 736), "limit_type": config.get("limit_type", "min"), }, "NormalizeImage": { "std": [0.229, 0.224, 0.225], "mean": [0.485, 0.456, 0.406], "scale": "1./255.", "order": "hwc", }, "ToCHWImage": None, "KeepKeys": {"keep_keys": ["image", "shape"]}, } self.preprocess_op = create_operators(pre_process_list) post_process = { "thresh": config.get("thresh", 0.3), "box_thresh": config.get("box_thresh", 0.5), "max_candidates": config.get("max_candidates", 1000), "unclip_ratio": config.get("unclip_ratio", 1.6), "use_dilation": config.get("use_dilation", True), "score_mode": config.get("score_mode", "fast"), } self.postprocess_op = DBPostProcess(**post_process) self.infer = OrtInferSession(config) ... ```