# -*- coding: utf-8 -*- import cv2 import numpy as np from shapely.geometry import Polygon import pyclipper import migraphx import os from PIL import Image def AllocateOutputMemory(model): outputData={} for key in model.get_outputs().keys(): outputData[key] = migraphx.allocate_gpu(s=model.get_outputs()[key]) return outputData class BaseRecLabelDecode(object): """ 特征空间映射到文本空间 """ def __init__(self, character_dict_path=None, use_space_char=False): self.beg_str = "sos" self.end_str = "eos" self.reverse = False self.character_str = [] if character_dict_path is None: self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz" dict_character = list(self.character_str) else: with open(character_dict_path, "rb") as fin: lines = fin.readlines() for line in lines: line = line.decode("utf-8").strip("\n").strip("\r\n") self.character_str.append(line) if use_space_char: self.character_str.append(" ") dict_character = list(self.character_str) if "arabic" in character_dict_path: self.reverse = True dict_character = self.add_special_char(dict_character) self.dict = {} for i, char in enumerate(dict_character): self.dict[char] = i self.character = dict_character def pred_reverse(self, pred): pred_re = [] c_current = "" for c in pred: if not bool(re.search("[a-zA-Z0-9 :*./%+-]", c)): if c_current != "": pred_re.append(c_current) pred_re.append(c) c_current = "" else: c_current += c if c_current != "": pred_re.append(c_current) return "".join(pred_re[::-1]) def add_special_char(self, dict_character): return dict_character def get_word_info(self, text, selection): state = None word_content = [] word_col_content = [] word_list = [] word_col_list = [] state_list = [] valid_col = np.where(selection == True)[0] for c_i, char in enumerate(text): if "\u4e00" <= char <= "\u9fff": c_state = "cn" elif bool(re.search("[a-zA-Z0-9]", char)): c_state = "en&num" else: c_state = "splitter" if ( char == "." and state == "en&num" and c_i + 1 < len(text) and bool(re.search("[0-9]", text[c_i + 1])) ): c_state = "en&num" if ( char == "-" and state == "en&num" ): c_state = "en&num" if state == None: state = c_state if state != c_state: if len(word_content) != 0: word_list.append(word_content) word_col_list.append(word_col_content) state_list.append(state) word_content = [] word_col_content = [] state = c_state if state != "splitter": word_content.append(char) word_col_content.append(valid_col[c_i]) if len(word_content) != 0: word_list.append(word_content) word_col_list.append(word_col_content) state_list.append(state) return word_list, word_col_list, state_list def decode( self, text_index, text_prob=None, is_remove_duplicate=False, return_word_box=False, ): result_list = [] ignored_tokens = self.get_ignored_tokens() batch_size = len(text_index) for batch_idx in range(batch_size): selection = np.ones(len(text_index[batch_idx]), dtype=bool) if is_remove_duplicate: selection[1:] = text_index[batch_idx][1:] != text_index[batch_idx][:-1] for ignored_token in ignored_tokens: selection &= text_index[batch_idx] != ignored_token char_list = [ self.character[text_id] for text_id in text_index[batch_idx][selection] ] if text_prob is not None: conf_list = text_prob[batch_idx][selection] else: conf_list = [1] * len(selection) if len(conf_list) == 0: conf_list = [0] text = "".join(char_list) if self.reverse: text = self.pred_reverse(text) if return_word_box: word_list, word_col_list, state_list = self.get_word_info( text, selection ) result_list.append( ( text, np.mean(conf_list).tolist(), [ len(text_index[batch_idx]), word_list, word_col_list, state_list, ], ) ) else: result_list.append((text, np.mean(conf_list).tolist())) return result_list def get_ignored_tokens(self): return [0] class CTCLabelDecode(BaseRecLabelDecode): def __init__(self, character_dict_path=None, use_space_char=False, **kwargs): super(CTCLabelDecode, self).__init__(character_dict_path, use_space_char) def __call__(self, preds, label=None, return_word_box=False, *args, **kwargs): """ 1、获取每个通道上的最大概率值(ppocrv5每次可预测18385个字符) 2、字符解码 ,从模型输出从特征空间向字符空间映射 3、输出字符串/字符 """ batch_text_list = [] batch_label_list = [] for b in range(len(preds)): #获取最大概率和最大概率的索引 preds_idx = preds[b].argmax(axis=2) preds_prob = preds[b].max(axis=2) text = self.decode( preds_idx, preds_prob, is_remove_duplicate=True, return_word_box=return_word_box, ) if return_word_box: for rec_idx, rec in enumerate(text): wh_ratio = kwargs["wh_ratio_list"][b][id][rec_idx] rec[2][0] = rec[2][0] /wh_ratio if label is None: batch_text_list.append(text) continue label = self.decode(label) batch_text_list.append(text) batch_label_list.append(label) return batch_text_list, batch_label_list def add_special_char(self, dict_character): dict_character = ["blank"] + dict_character return dict_character class TextRecgnizer(object): def __init__( self, rec_model_path, rec_batch_num=1, rec_input_size=(48, 480),#(h,w) rec_algorithm="SVTR_LCNet", precision_mode = "fp16", **kwargs ): self.rec_algorithm = rec_algorithm self.rec_input_size = rec_input_size self.precision_mode = precision_mode self.rec_batch_num = rec_batch_num self.offload_copy = kwargs.get("offload_copy", True) if os.path.exists(rec_model_path) and rec_model_path.endswith(".onnx"): self.rec_input_name = "x" maxInput={self.rec_input_name:[rec_batch_num,3,self.rec_input_size[0],self.rec_input_size[1]]} self.rec_model = migraphx.parse_onnx(rec_model_path,map_input_dims=maxInput) if self.precision_mode == "fp16": migraphx.quantize_fp16(self.rec_model) self.rec_model.compile(t=migraphx.get_target("gpu"),offload_copy=self.offload_copy,device_id=0) inputs = self.rec_model.get_inputs() outputs = self.rec_model.get_outputs() if self.offload_copy==False: self.d_mem = AllocateOutputMemory(self.rec_model) in_data = np.ones((rec_batch_num,3,self.rec_input_size[0],self.rec_input_size[1]),dtype=np.float32) #推理前warm up一次 self.d_mem[self.rec_input_name] =migraphx.to_gpu(migraphx.argument(in_data)) self.rec_model.run(self.d_mem) else: #推理前warm up一次 in_data = np.ones((rec_batch_num,3,self.rec_input_size[0],self.rec_input_size[1]),dtype=np.float32) self.rec_model.run({self.rec_input_name:in_data}) print("Text recognizition model info:") print(f" inputs info:{inputs}") print(f" outputs info:{outputs}") def __call__(self, batch_img_list): """ 1、输入预处理 2、拼batch 3、推理 4、输出字符特征的featmap """ if len(batch_img_list) == 0: return [] width_list = [] for b in range(len(batch_img_list)): for img in batch_img_list[b]: width_list.append(img.shape[1] / float(img.shape[0])) # indices = np.argsort(np.array(width_list)) input_batch = self.rec_batch_num batch_outputs_pre = [] batch_max_wh_ratio_pre = [] for b in range(len(batch_img_list)): im_count = len(batch_img_list[b]) batch_outputs = [] batch_max_wh_ratio = [] for beg_img_no in range(0, im_count, input_batch): end_img_no = min(im_count, beg_img_no + input_batch) batch_norm_imgs = [] max_wh_ratio = list() # N batch for ino in range(beg_img_no, end_img_no): norm_img = self.preprocess(batch_img_list[b][ino], max_wh_ratio) norm_img = norm_img[np.newaxis, :].astype(np.float32) batch_norm_imgs.append(norm_img) if len(batch_norm_imgs)==0: continue batch_max_wh_ratio.append(max_wh_ratio) norm_img_batch = np.concatenate(batch_norm_imgs) norm_img_batch = norm_img_batch.copy() if self.offload_copy==False: print("offload copy model") self.d_mem[self.rec_input_name] =migraphx.to_gpu(migraphx.argument(norm_img_batch)) results = self.rec_model.run(self.d_mem) output = np.array(results[0]) else: results = self.rec_model.run({self.rec_input_name:norm_img_batch}) output = results[0] [batch_outputs.append(out) for out in np.array(output)] batch_outputs_pre.append(np.array(batch_outputs)) batch_max_wh_ratio_pre.append(batch_max_wh_ratio) return batch_outputs_pre ,batch_max_wh_ratio_pre def preprocess(self, img, max_wh_ratio): if isinstance(max_wh_ratio,list) ==False: raise TypeError("max_wh_ratio must be list") imgH, imgW = self.rec_input_size max_h,max_w = self.rec_input_size h, w = img.shape[:2] #沿着h axixientation 轴进行resize if h <= max_h: ratio = max_h / h w = int(w*ratio) if w <= max_w: re_size =(w,max_h) else: re_size = (max_w,max_h) else: ratio = max_h/h w,h = int(w*ratio),max_h if w <= max_w: re_size = (w,h) else: re_size = (max_w,h) max_wh_ratio.append(ratio) resized_image = cv2.resize(img, re_size) resized_image = resized_image.astype("float32") resized_image = resized_image.transpose((2, 0, 1)) / 255 resized_image -= 0.5 resized_image /= 0.5 padding_im = np.zeros((3, imgH, imgW), dtype=np.float32) padding_im[:, :, 0:re_size[0]] = resized_image return padding_im class TextDetector(object): def __init__( self, det_model_path, db_input_size=(640,640), thresh=0.3, box_thresh=0.7, max_candidates=1000, unclip_ratio=2.0, use_dilation=False, score_mode="fast", box_type="quad", precision_mode="float32", **kwargs, ): self.thresh = thresh self.db_input_size = db_input_size self.box_thresh = box_thresh self.max_candidates = max_candidates self.unclip_ratio = unclip_ratio self.min_size = 3 self.score_mode = score_mode self.box_type = box_type self.precision_mode = precision_mode assert score_mode in [ "slow", "fast", ], "Score mode not support: {}".format(score_mode) self.dilation_kernel = None if not use_dilation else np.array([[1, 1], [1, 1]]) self.offload_copy = kwargs.get("offload_copy", True) if os.path.exists(det_model_path) and det_model_path.endswith(".onnx"): self.det_input_name = "x" maxInput={self.det_input_name:[1,3,db_input_size[0],db_input_size[1]]} self.db_model = migraphx.parse_onnx(det_model_path,map_input_dims=maxInput) inputs = self.db_model.get_inputs() outputs = self.db_model.get_outputs() if self.precision_mode == "fp16": migraphx.quantize_fp16(self.db_model) self.db_model.compile(t=migraphx.get_target("gpu"),offload_copy=self.offload_copy,device_id=0) if self.offload_copy==False: self.d_mem = AllocateOutputMemory(self.db_model) in_data = np.ones((1,3,db_input_size[0],db_input_size[1]),dtype=np.float32) #推理前warm up一次 self.d_mem[self.det_input_name] =migraphx.to_gpu(migraphx.argument(in_data)) self.db_model.run(self.d_mem) else: #推理前warm up一次 in_data = np.ones((1,3,db_input_size[0],db_input_size[1]),dtype=np.float32) self.db_model.run({self.det_input_name:in_data}) print("Detection model info:") print(f" inputs info:{inputs}") print(f" outputs info:{outputs}") def polygons_from_bitmap(self, pred, _bitmap, ratio_w,ratio_h,dest_width, dest_height): bitmap = _bitmap height, width = bitmap.shape boxes = [] scores = [] #字符区域提取 contours, _ = cv2.findContours( (bitmap * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE ) for contour in contours[: self.max_candidates]: epsilon = 0.002 * cv2.arcLength(contour, True) approx = cv2.approxPolyDP(contour, epsilon, True) points = approx.reshape((-1, 2)) if points.shape[0] < 4: continue score = self.box_score_fast(pred, points.reshape(-1, 2)) if self.box_thresh > score: continue if points.shape[0] > 2: box = self.unclip(points, self.unclip_ratio) if len(box) > 1: continue else: continue box = np.array(box).reshape(-1, 2) if len(box) == 0: continue _, sside = self.get_mini_boxes(box.reshape((-1, 1, 2))) if sside < self.min_size + 2: continue box = np.array(box) box[:, 0] = np.clip(np.round(box[:, 0] /ratio_w), 0, dest_width) box[:, 1] = np.clip( np.round(box[:, 1] / ratio_h), 0, dest_height ) boxes.append(box.tolist()) scores.append(score) return boxes, scores def boxes_from_bitmap(self, pred, _bitmap, ratio_w,ratio_h, dest_width, dest_height): bitmap = _bitmap height, width = bitmap.shape outs = cv2.findContours( (bitmap * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE ) if len(outs) == 3: img, contours, _ = outs[0], outs[1], outs[2] elif len(outs) == 2: contours, _ = outs[0], outs[1] num_contours = min(len(contours), self.max_candidates) boxes = [] scores = [] for index in range(num_contours): contour = contours[index] points, sside = self.get_mini_boxes(contour) if sside < self.min_size: continue points = np.array(points) if self.score_mode == "fast": score = self.box_score_fast(pred, points.reshape(-1, 2)) else: score = self.box_score_slow(pred, contour) if self.box_thresh > score: continue box = self.unclip(points, self.unclip_ratio) if len(box) > 1: continue box = np.array(box).reshape(-1, 1, 2) box, sside = self.get_mini_boxes(box) if sside < self.min_size + 2: continue box = np.array(box) box[:, 0] = np.clip(np.round(box[:, 0] / ratio_w), 0, dest_width) box[:, 1] = np.clip( np.round(box[:, 1] / ratio_h), 0, dest_height ) boxes.append(box.astype("int32")) scores.append(score) return np.array(boxes, dtype="int32"), scores def unclip(self, box, unclip_ratio): poly = Polygon(box) distance = poly.area * unclip_ratio / poly.length offset = pyclipper.PyclipperOffset() offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON) expanded = offset.Execute(distance) return expanded def get_mini_boxes(self, contour): bounding_box = cv2.minAreaRect(contour) points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0]) index_1, index_2, index_3, index_4 = 0, 1, 2, 3 if points[1][1] > points[0][1]: index_1 = 0 index_4 = 1 else: index_1 = 1 index_4 = 0 if points[3][1] > points[2][1]: index_2 = 2 index_3 = 3 else: index_2 = 3 index_3 = 2 box = [points[index_1], points[index_2], points[index_3], points[index_4]] return box, min(bounding_box[1]) def box_score_fast(self, bitmap, _box): h, w = bitmap.shape[:2] box = _box.copy() xmin = np.clip(np.floor(box[:, 0].min()).astype("int32"), 0, w - 1) xmax = np.clip(np.ceil(box[:, 0].max()).astype("int32"), 0, w - 1) ymin = np.clip(np.floor(box[:, 1].min()).astype("int32"), 0, h - 1) ymax = np.clip(np.ceil(box[:, 1].max()).astype("int32"), 0, h - 1) mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8) box[:, 0] = box[:, 0] - xmin box[:, 1] = box[:, 1] - ymin cv2.fillPoly(mask, box.reshape(1, -1, 2).astype("int32"), 1) return cv2.mean(bitmap[ymin : ymax + 1, xmin : xmax + 1], mask)[0] def box_score_slow(self, bitmap, contour): h, w = bitmap.shape[:2] contour = contour.copy() contour = np.reshape(contour, (-1, 2)) xmin = np.clip(np.min(contour[:, 0]), 0, w - 1) xmax = np.clip(np.max(contour[:, 0]), 0, w - 1) ymin = np.clip(np.min(contour[:, 1]), 0, h - 1) ymax = np.clip(np.max(contour[:, 1]), 0, h - 1) mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8) contour[:, 0] = contour[:, 0] - xmin contour[:, 1] = contour[:, 1] - ymin cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype("int32"), 1) return cv2.mean(bitmap[ymin : ymax + 1, xmin : xmax + 1], mask)[0] def box_standardization(self,boxes_batch,shape_list): dt_batch_boxs = [] dt_batch_rects = [] for b in range(len(boxes_batch)): src_h, src_w, _, _ = shape_list[b] det_boxs = [] for box in boxes_batch[b]: if isinstance(box,list): box = np.array(box) rect = np.zeros((4, 2), dtype="float32") s = box.sum(axis=1) rect[0] = box[np.argmin(s)] rect[2] = box[np.argmax(s)] tmp = np.delete(box, (np.argmin(s), np.argmax(s)), axis=0) #diff = y-x bottom-left : y>x top-right:y self.thresh boxes_batch = [] for batch_index in range(pred.shape[0]): src_h, src_w, ratio_h, ratio_w = shape_list[batch_index] if self.dilation_kernel is not None: mask = cv2.dilate( np.array(segmentation[batch_index]).astype(np.uint8), self.dilation_kernel, ) else: mask = segmentation[batch_index] if self.box_type == "poly": boxes, scores = self.polygons_from_bitmap( pred[batch_index], mask, ratio_w,ratio_h, src_w, src_h ) elif self.box_type == "quad": boxes, scores = self.boxes_from_bitmap( pred[batch_index], mask, ratio_w,ratio_h, src_w, src_h ) else: raise ValueError("box_type can only be one of ['quad', 'poly']") boxes_batch.append(boxes) det_box_batch = self.sorted_boxes(boxes_batch) dt_boxes = self.box_standardization(det_box_batch,shape_list) return dt_boxes def preprocess(self, src_img, mean: list = [0.485, 0.456, 0.406], std: list = [0.229, 0.224, 0.225], scale: float = 1.0/255): data = dict() img = src_img.copy() src_h, src_w, _ = img.shape res_img, [ratio_h, ratio_w] = self.resize_image(img) norm_img = (res_img* scale - mean) / std image_data = norm_img.transpose(2, 0, 1) image_data = np.expand_dims(image_data, axis=0).astype(np.float32) image_data = np.ascontiguousarray(image_data) data["image"] = image_data data["shape"] = np.array([src_h, src_w, ratio_h, ratio_w]) return data def resize_image(self, img): h, w, _ = img.shape if h > w: ratio = float(self.db_input_size[1]) / h else: ratio = float(self.db_input_size[0]) / w resize_h = int(h * ratio) resize_w = int(w * ratio) resize_h = max(int(round(resize_h / 32) * 32), 32) resize_w = max(int(round(resize_w / 32) * 32), 32) try: if int(resize_w) <= 0 or int(resize_h) <= 0: return None, (None, None) img = cv2.resize(img, (int(resize_w), int(resize_h))) except: print(img.shape, resize_w, resize_h) raise ValueError("resize error") ratio_h = resize_h / float(h) ratio_w = resize_w / float(w) im_pad = np.zeros((self.db_input_size[1], self.db_input_size[0], 3), np.float32) im_pad[:resize_h, :resize_w, :] = img return im_pad, [ratio_h, ratio_w] def sorted_boxes(self,dt_boxes): dt_boxes = dt_boxes[0] boxes_np = np.array(dt_boxes, dtype=np.int32) batch_boxes = list() # 计算每个框的参考点(左上角)和几何特征 top_left = boxes_np[:, 0, :] widths = boxes_np[:, 1, 0] - boxes_np[:, 0, 0] heights = boxes_np[:, 2, 1] - boxes_np[:, 0, 1] avg_height = np.median(heights) # 按y坐标主要排序,x坐标次要排序 sorted_indices = np.lexsort((top_left[:, 0], top_left[:, 1])) # 分组调整:将y坐标相近的框视为同一行 final_order = [] original_indices = [] current_row = [(0, sorted_indices[0])] # (x_coord, original_idx) for idx in sorted_indices[1:]: # 如果当前框与前一框的y坐标差小于行高的0.6倍,视为同一行 if abs(top_left[idx,1] - top_left[current_row[-1][1],1]) < avg_height * 0.6: current_row.append((top_left[idx,0], idx)) else: # 对当前行按x坐标排序 current_row_sorted = sorted(current_row, key=lambda x: x[0]) final_order.extend([x[1] for x in current_row_sorted]) current_row = [(top_left[idx,0], idx)] # 添加最后一行 current_row_sorted = sorted(current_row, key=lambda x: x[0]) final_order.extend([x[1] for x in current_row_sorted]) batch_boxes.append(boxes_np[final_order]) # 返回排序后的框 return batch_boxes class PPOcrV5(): def __init__(self, det_model_path:str, rec_model_path:str, char_dict_path:str = "../Resource/ppocr_keys_v5.txt", db_input_size :list = (640,640), rec_input_size :list = (48,720), seg_thresh:float=0.3, box_thresh:float=0.7, precision_mode:str='fp16', offload_copy:bool=True, **kwargs ): """ det_model_path: 字符检测模型路径 rec_model_path: 字符识别模型路径 seg_thresh: dbnet 像素分割阈值 box_thresh: 字符边界框阈值 db_input_size: 模型输入size """ self.seg_thres = seg_thresh self.box_thresh = box_thresh self.db_input_size = db_input_size self.offload_copy = offload_copy if hasattr(kwargs,"max_candidates"): self.max_candidates = kwargs["max_candidates"] else: self.max_candidates = 1000 if hasattr(kwargs,"unclip_ratio"): self.unclip_ratio = kwargs["unclip_ratio"] else: self.unclip_ratio = 2.0 if hasattr(kwargs,"use_dilation"): self.use_dilation = kwargs["use_dilation"] else: self.use_dilation = False if hasattr(kwargs,"score_mode"): self.score_mode = kwargs["score_mode"] else: self.score_mode = "fast" if hasattr(kwargs,"box_type"): self.box_type = kwargs["box_type"] else: self.box_type = "quad" self.db_detector = TextDetector( det_model_path, db_input_size, thresh=self.seg_thres, box_thresh=self.box_thresh, max_candidates=self.max_candidates, unclip_ratio=self.unclip_ratio, box_type=self.box_type, use_dilation=self.use_dilation, score_mode=self.score_mode, precision_mode=precision_mode, offload_copy=offload_copy ) self.text_extractor = TextRecgnizer(rec_model_path=rec_model_path, rec_input_size=rec_input_size, precision_mode=precision_mode, offload_copy=offload_copy) self.ctc_decoder = CTCLabelDecode(character_dict_path=char_dict_path, use_space_char=True) def __call__(self, src_img): import time start = time.time() dt_boxs = self.db_detector(src_img) batch_img_list = self.detection_roi_crop(src_img,dt_boxs) batch_outputs_pre ,batch_max_wh_ratio_pre = self.text_extractor(batch_img_list) batch_text_list, batch_label_list = self.ctc_decoder(batch_outputs_pre,return_word_box=False,wh_ratio_list = batch_max_wh_ratio_pre) end = time.time() batch_text_out = [] batch_boxes_out = [] for b in range(len(dt_boxs)): text_out = [] boxex_out = [] for box, rec_result in zip(dt_boxs[b], batch_text_list[b]): text, score = rec_result[0], rec_result[1] if score >= 0.5: text_out.append(rec_result) boxex_out.append(box) batch_text_out.append(text_out) batch_boxes_out.append(boxex_out) for b in range(len(batch_text_out)): for text, score in batch_text_out[b]: print("{}, {:.3f}".format(text, score)) res_img = self.vis_boxes(batch_boxes_out,src_img) res_img = self.vis_oct_text(batch_text_out,batch_boxes_out,res_img) print(f"[Time info] elapsed:{(end-start)*1000:.4f} ms") return res_img def detection_roi_crop(self,src_img,boxes): batch_cut_imgs = list() for b in range(len(boxes)): crop_imgs = list() for tl,tr,br,bl in boxes[b]: box = [int(tl[0]),int(tl[1]),int(br[0]),int(br[1])] crop_img = src_img[box[1]:box[3], box[0]:box[2],:] crop_imgs.append(crop_img) batch_cut_imgs.append(crop_imgs) return batch_cut_imgs def vis_oct_text(self,batch_text,batch_boxes,src_img,fornt_path="../Resource/fonts/simfang.ttf"): from PIL import Image, ImageDraw, ImageFont img = np.zeros(src_img.shape, dtype=np.uint8) img.fill(255) pil_img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) draw = ImageDraw.Draw(pil_img) for b in range(len(batch_text)): for id,text in enumerate(batch_text[b]): text,conf = text f_start = (batch_boxes[b][id][0][0],batch_boxes[b][id][0][1]) f_end = (batch_boxes[b][id][2][0],batch_boxes[b][id][2][1]) w,h = np.array(f_end) - np.array(f_start) font_size = int(h*0.9) font = ImageFont.truetype(fornt_path, font_size,encoding="utf-8") draw.text(f_start, text, font=font, fill=(0, 255, 0)) res_img = np.concatenate([src_img, np.array(pil_img)], axis=1) return res_img def vis_boxes(self,boxes, img, colors=(0,255,0), thickness=2): for b in range(len(boxes)): for tl,tr,br,bl in boxes[b]: box = [int(tl[0]),int(tl[1]),int(br[0]),int(br[1])] cv2.rectangle(img, (box[0],box[1]), (box[2],box[3]), colors, thickness) return img if __name__ == '__main__': det_onnx_path = "../Resource/Models/ppocrv5_server_det_infer.onnx" rec_onnx_path = "../Resource/Models/ppocrv5_server_rec_infer.onnx" image_path = "../Resource/Images/demo.png" img = cv2.imread(image_path) ppocrv5 = PPOcrV5(det_onnx_path,rec_onnx_path,offload_copy=True,precision_mode="fp16") res_img = ppocrv5(img) cv2.imwrite("res.jpg",res_img)