Commit b3da71f5 authored by chenxj's avatar chenxj
Browse files

update predict_system.py predict_det.py predict_cls.py predict_rec.py utility.py README.md

parent 3f11da7d
...@@ -62,6 +62,10 @@ python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/eval.py -c configs/ ...@@ -62,6 +62,10 @@ python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/eval.py -c configs/
``` ```
python3 tools/infer/predict_system.py --image_dir="./doc/imgs/" --det_model_dir="./ch_PP-OCRv3_det_infer/" --rec_model_dir="./ch_PP-OCRv3_rec_infer/" --use_angle_cls=false --rec_image_shape=3,48,320 --warmup=1 python3 tools/infer/predict_system.py --image_dir="./doc/imgs/" --det_model_dir="./ch_PP-OCRv3_det_infer/" --rec_model_dir="./ch_PP-OCRv3_rec_infer/" --use_angle_cls=false --rec_image_shape=3,48,320 --warmup=1
``` ```
### 推理(ort)
```
python3 tools/infer/predict_system.py --image_dir="./doc/imgs/" --det_model_dir="./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det.onnx" --cls_model_dir="./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx" --rec_model_dir="./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec.onnx" --use_onnx=true --use_angle_cls=true --rec_image_shape=3,48,320 --warmup=1
```
## 性能和准确率数据 ## 性能和准确率数据
检测模型测试 检测模型测试
......
...@@ -58,6 +58,64 @@ class TextClassifier(object): ...@@ -58,6 +58,64 @@ class TextClassifier(object):
padding_im[:, :, 0:resized_w] = resized_image padding_im[:, :, 0:resized_w] = resized_image
return padding_im return padding_im
def resize_norm_img_section(self, img, max_wh_ratio):
# print("rec resize for section")
imgC, imgH, imgW = self.cls_image_shape
assert imgC == img.shape[2]
rec_precision_level = os.environ.get("OCR_REC_PRECISION")
max_w = imgH * 48
# max_w = 2304
if rec_precision_level =='0':
imgW = max_w
elif rec_precision_level == '1':
imgW = int((imgH * max_wh_ratio))
if imgW <= max_w / 2:
imgW = max_w / 2
else:
imgW = max_w
elif rec_precision_level == '2':
imgW = int((imgH * max_wh_ratio))
if imgW <= max_w / 4:
imgW = max_w / 4
elif imgW > max_w / 4 and imgW <= max_w / 2:
imgW = max_w / 2
elif imgW > max_w / 2 and imgW <= 3 * max_w / 4:
imgW = 3 * max_w / 4
else:
imgW = max_w
else:
imgW = int((imgH * max_wh_ratio))
if imgW <= max_w / 6:
imgW = max_w / 6
elif imgW > max_w / 6 and imgW <= max_w / 3:
imgW = max_w / 3
elif imgW > max_w / 3 and imgW <= max_w / 2:
imgW = max_w / 2
elif imgW > max_w / 2 and imgW <= 2 * max_w / 3:
imgW = 2 * max_w / 3
elif imgW > 2 *max_w / 3 and imgW <= 5 * max_w / 6:
imgW = 5 * max_w / 6
else:
imgW = max_w
imgW = int(imgW)
h, w = img.shape[:2]
ratio = w / float(h)
if math.ceil(imgH * ratio) > imgW:
resized_w = imgW
else:
resized_w = int(math.ceil(imgH * ratio))
resized_image = cv2.resize(img, (resized_w, imgH))
resized_image = resized_image.astype('float32')
resized_image = resized_image.transpose((2, 0, 1)) / 255
resized_image -= 0.5
resized_image /= 0.5
padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
padding_im[:, :, 0:resized_w] = resized_image
return padding_im
def __call__(self, img_list): def __call__(self, img_list):
img_list = copy.deepcopy(img_list) img_list = copy.deepcopy(img_list)
img_num = len(img_list) img_num = len(img_list)
...@@ -69,23 +127,81 @@ class TextClassifier(object): ...@@ -69,23 +127,81 @@ class TextClassifier(object):
indices = np.argsort(np.array(width_list)) indices = np.argsort(np.array(width_list))
cls_res = [['', 0.0]] * img_num cls_res = [['', 0.0]] * img_num
batch_num = self.cls_batch_num if img_num <= 0:
elapse = 0 return cls_res, 0
for beg_img_no in range(0, img_num, batch_num): max_batnum = 24
min_batnum = 8
if os.environ.get("OCR_REC_MAX_BATNUM") is not None:
max_batnum = int(os.environ.get("OCR_REC_MAX_BATNUM"))
if os.environ.get("OCR_REC_MIN_BATNUM") is not None:
min_batnum = int(os.environ.get("OCR_REC_MIN_BATNUM"))
assert max_batnum / min_batnum == int(max_batnum / min_batnum), "max_batnum must be multiple of min_batnum."
img_num_left = img_num
img_no_count = 0
st = time.time()
if img_num_left > max_batnum:
batch_num = max_batnum
batch_num = int(batch_num)
for beg_img_no in range(img_no_count, int(img_num_left / batch_num) * batch_num, batch_num):
end_img_no = beg_img_no + batch_num
norm_img_batch = []
max_wh_ratio = 0
for ino in range(beg_img_no, end_img_no):
h, w = img_list[indices[ino]].shape[0:2]
wh_ratio = w * 1.0 / h
max_wh_ratio = max(max_wh_ratio, wh_ratio)
for ino in range(beg_img_no, end_img_no):
norm_img = self.resize_norm_img_section(img_list[indices[ino]], max_wh_ratio)
norm_img = norm_img[np.newaxis, :]
norm_img_batch.append(norm_img)
end_img_no = min(img_num, beg_img_no + batch_num) norm_img_batch = np.concatenate(norm_img_batch, axis=0)
norm_img_batch = norm_img_batch.copy()
if self.use_onnx:
input_dict = {}
input_dict[self.input_tensor.name] = norm_img_batch
outputs = self.predictor.run(self.output_tensors, input_dict)
prob_out = outputs[0]
else:
self.input_tensor.copy_from_cpu(norm_img_batch)
self.predictor.run()
prob_out = self.output_tensors[0].copy_to_cpu()
self.predictor.try_shrink_memory()
cls_result = self.postprocess_op(prob_out)
for rno in range(len(cls_result)):
label, score = cls_result[rno]
cls_res[indices[beg_img_no + rno]] = [label, score]
if '180' in label and score > self.cls_thresh:
img_list[indices[beg_img_no + rno]] = cv2.rotate(
img_list[indices[beg_img_no + rno]], 1)
img_no_count = int(img_num_left / batch_num) * batch_num
img_num_left = img_num_left - int(img_num_left / batch_num) * batch_num
batch_num = math.ceil(img_num_left / min_batnum) * min_batnum
batch_num = int(batch_num)
Dnum = batch_num - img_num_left
for dno in range(Dnum):
indices = np.append(indices,img_num + dno)
cls_res.append(['', 0.0])
beg_img_no = img_no_count
end_img_no = img_num
norm_img_batch = [] norm_img_batch = []
max_wh_ratio = 0 max_wh_ratio = 0
starttime = time.time()
for ino in range(beg_img_no, end_img_no): for ino in range(beg_img_no, end_img_no):
h, w = img_list[indices[ino]].shape[0:2] h, w = img_list[indices[ino]].shape[0:2]
wh_ratio = w * 1.0 / h wh_ratio = w * 1.0 / h
max_wh_ratio = max(max_wh_ratio, wh_ratio) max_wh_ratio = max(max_wh_ratio, wh_ratio)
for ino in range(beg_img_no, end_img_no): for ino in range(beg_img_no, end_img_no):
norm_img = self.resize_norm_img(img_list[indices[ino]]) norm_img = self.resize_norm_img_section(img_list[indices[ino]], max_wh_ratio)
norm_img = norm_img[np.newaxis, :] norm_img = norm_img[np.newaxis, :]
norm_img_batch.append(norm_img) norm_img_batch.append(norm_img)
norm_img_batch = np.concatenate(norm_img_batch) norm_img_batch = np.concatenate(norm_img_batch)
if norm_img_batch.shape[0] != batch_num:
img_tmp = np.zeros((batch_num - norm_img_batch.shape[0], norm_img_batch.shape[1], norm_img_batch.shape[2], norm_img_batch.shape[3]), dtype=np.float32)
norm_img_batch = np.concatenate([norm_img_batch, img_tmp])
norm_img_batch = norm_img_batch.copy() norm_img_batch = norm_img_batch.copy()
if self.use_onnx: if self.use_onnx:
...@@ -99,14 +215,14 @@ class TextClassifier(object): ...@@ -99,14 +215,14 @@ class TextClassifier(object):
prob_out = self.output_tensors[0].copy_to_cpu() prob_out = self.output_tensors[0].copy_to_cpu()
self.predictor.try_shrink_memory() self.predictor.try_shrink_memory()
cls_result = self.postprocess_op(prob_out) cls_result = self.postprocess_op(prob_out)
elapse += time.time() - starttime
for rno in range(len(cls_result)): for rno in range(len(cls_result)):
label, score = cls_result[rno] label, score = cls_result[rno]
cls_res[indices[beg_img_no + rno]] = [label, score] cls_res[indices[beg_img_no + rno]] = [label, score]
if '180' in label and score > self.cls_thresh: if '180' in label and score > self.cls_thresh and (beg_img_no + rno) < img_num:
img_list[indices[beg_img_no + rno]] = cv2.rotate( img_list[indices[beg_img_no + rno]] = cv2.rotate(
img_list[indices[beg_img_no + rno]], 1) img_list[indices[beg_img_no + rno]], 1)
return img_list, cls_res, elapse
return img_list, cls_res, time.time() - st
def main(args): def main(args):
......
...@@ -120,6 +120,11 @@ class TextDetector(object): ...@@ -120,6 +120,11 @@ class TextDetector(object):
# print(img.shape) # print(img.shape)
img = img.copy() img = img.copy()
if self.use_onnx:
input_dict = {}
input_dict[self.input_tensor.name] = img
outputs = self.predictor.run(self.output_tensors, input_dict)
else:
self.input_tensor.copy_from_cpu(img) self.input_tensor.copy_from_cpu(img)
self.predictor.run() self.predictor.run()
paddle.device.cuda.synchronize() paddle.device.cuda.synchronize()
......
...@@ -33,6 +33,7 @@ class TextRecognizer(object): ...@@ -33,6 +33,7 @@ class TextRecognizer(object):
self.postprocess_op = build_post_process(postprocess_params) self.postprocess_op = build_post_process(postprocess_params)
self.predictor, self.input_tensor, self.output_tensors, self.config = \ self.predictor, self.input_tensor, self.output_tensors, self.config = \
utility.create_predictor(args, 'rec', logger) utility.create_predictor(args, 'rec', logger)
self.use_onnx = args.use_onnx
def resize_norm_img_section(self, img, max_wh_ratio): def resize_norm_img_section(self, img, max_wh_ratio):
# print("rec resize for section") # print("rec resize for section")
...@@ -133,6 +134,13 @@ class TextRecognizer(object): ...@@ -133,6 +134,13 @@ class TextRecognizer(object):
norm_img_batch = np.concatenate(norm_img_batch, axis=0) norm_img_batch = np.concatenate(norm_img_batch, axis=0)
norm_img_batch = norm_img_batch.copy() norm_img_batch = norm_img_batch.copy()
if self.use_onnx:
input_dict = {}
input_dict[self.input_tensor.name] = norm_img_batch
outputs = self.predictor.run(self.output_tensors,
input_dict)
preds = outputs[0]
else:
self.input_tensor.copy_from_cpu(norm_img_batch) self.input_tensor.copy_from_cpu(norm_img_batch)
self.predictor.run() self.predictor.run()
...@@ -176,6 +184,13 @@ class TextRecognizer(object): ...@@ -176,6 +184,13 @@ class TextRecognizer(object):
norm_img_batch = np.concatenate([norm_img_batch, img_tmp]) norm_img_batch = np.concatenate([norm_img_batch, img_tmp])
norm_img_batch = norm_img_batch.copy() norm_img_batch = norm_img_batch.copy()
if self.use_onnx:
input_dict = {}
input_dict[self.input_tensor.name] = norm_img_batch
outputs = self.predictor.run(self.output_tensors,
input_dict)
preds = outputs[0]
else:
self.input_tensor.copy_from_cpu(norm_img_batch) self.input_tensor.copy_from_cpu(norm_img_batch)
self.predictor.run() self.predictor.run()
......
...@@ -133,6 +133,7 @@ def main(args): ...@@ -133,6 +133,7 @@ def main(args):
img_rec_list = [] img_rec_list = []
for i in range(min_batnum * (bn + 1)): for i in range(min_batnum * (bn + 1)):
img_rec_list.append(img_warm_rec) img_rec_list.append(img_warm_rec)
cls_results = text_sys.text_classifier(img_rec_list)
rec_results = text_sys.text_recognizer(img_rec_list) rec_results = text_sys.text_recognizer(img_rec_list)
elapsewarm = time.time() - startwarm elapsewarm = time.time() - startwarm
logger.debug("warmup time:{}".format(elapsewarm)) logger.debug("warmup time:{}".format(elapsewarm))
......
...@@ -107,6 +107,16 @@ def create_predictor(args, mode, logger): ...@@ -107,6 +107,16 @@ def create_predictor(args, mode, logger):
logger.info("not find {} model file path {}".format(mode, model_dir)) logger.info("not find {} model file path {}".format(mode, model_dir))
sys.exit(0) sys.exit(0)
if args.use_onnx:
import onnxruntime as ort
model_file_path = model_dir
if not os.path.exists(model_file_path):
raise ValueError("not find model file path {}".format(
model_file_path))
sess = ort.InferenceSession(model_file_path, providers=[('ROCMExecutionProvider', {'device_id': '4'}),'CPUExecutionProvider'])
return sess, sess.get_inputs()[0], None, None
else:
model_file_path = model_dir + "/inference.pdmodel" model_file_path = model_dir + "/inference.pdmodel"
params_file_path = model_dir + "/inference.pdiparams" params_file_path = model_dir + "/inference.pdiparams"
if not os.path.exists(model_file_path): if not os.path.exists(model_file_path):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment