Commit b3da71f5 authored by chenxj's avatar chenxj
Browse files

update predict_system.py predict_det.py predict_cls.py predict_rec.py utility.py README.md

parent 3f11da7d
...@@ -62,6 +62,10 @@ python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/eval.py -c configs/ ...@@ -62,6 +62,10 @@ python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/eval.py -c configs/
``` ```
python3 tools/infer/predict_system.py --image_dir="./doc/imgs/" --det_model_dir="./ch_PP-OCRv3_det_infer/" --rec_model_dir="./ch_PP-OCRv3_rec_infer/" --use_angle_cls=false --rec_image_shape=3,48,320 --warmup=1 python3 tools/infer/predict_system.py --image_dir="./doc/imgs/" --det_model_dir="./ch_PP-OCRv3_det_infer/" --rec_model_dir="./ch_PP-OCRv3_rec_infer/" --use_angle_cls=false --rec_image_shape=3,48,320 --warmup=1
``` ```
### 推理(ort)
```
python3 tools/infer/predict_system.py --image_dir="./doc/imgs/" --det_model_dir="./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det.onnx" --cls_model_dir="./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx" --rec_model_dir="./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec.onnx" --use_onnx=true --use_angle_cls=true --rec_image_shape=3,48,320 --warmup=1
```
## 性能和准确率数据 ## 性能和准确率数据
检测模型测试 检测模型测试
......
...@@ -57,6 +57,64 @@ class TextClassifier(object): ...@@ -57,6 +57,64 @@ class TextClassifier(object):
padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
padding_im[:, :, 0:resized_w] = resized_image padding_im[:, :, 0:resized_w] = resized_image
return padding_im return padding_im
def resize_norm_img_section(self, img, max_wh_ratio):
# print("rec resize for section")
imgC, imgH, imgW = self.cls_image_shape
assert imgC == img.shape[2]
rec_precision_level = os.environ.get("OCR_REC_PRECISION")
max_w = imgH * 48
# max_w = 2304
if rec_precision_level =='0':
imgW = max_w
elif rec_precision_level == '1':
imgW = int((imgH * max_wh_ratio))
if imgW <= max_w / 2:
imgW = max_w / 2
else:
imgW = max_w
elif rec_precision_level == '2':
imgW = int((imgH * max_wh_ratio))
if imgW <= max_w / 4:
imgW = max_w / 4
elif imgW > max_w / 4 and imgW <= max_w / 2:
imgW = max_w / 2
elif imgW > max_w / 2 and imgW <= 3 * max_w / 4:
imgW = 3 * max_w / 4
else:
imgW = max_w
else:
imgW = int((imgH * max_wh_ratio))
if imgW <= max_w / 6:
imgW = max_w / 6
elif imgW > max_w / 6 and imgW <= max_w / 3:
imgW = max_w / 3
elif imgW > max_w / 3 and imgW <= max_w / 2:
imgW = max_w / 2
elif imgW > max_w / 2 and imgW <= 2 * max_w / 3:
imgW = 2 * max_w / 3
elif imgW > 2 *max_w / 3 and imgW <= 5 * max_w / 6:
imgW = 5 * max_w / 6
else:
imgW = max_w
imgW = int(imgW)
h, w = img.shape[:2]
ratio = w / float(h)
if math.ceil(imgH * ratio) > imgW:
resized_w = imgW
else:
resized_w = int(math.ceil(imgH * ratio))
resized_image = cv2.resize(img, (resized_w, imgH))
resized_image = resized_image.astype('float32')
resized_image = resized_image.transpose((2, 0, 1)) / 255
resized_image -= 0.5
resized_image /= 0.5
padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
padding_im[:, :, 0:resized_w] = resized_image
return padding_im
def __call__(self, img_list): def __call__(self, img_list):
img_list = copy.deepcopy(img_list) img_list = copy.deepcopy(img_list)
...@@ -69,44 +127,102 @@ class TextClassifier(object): ...@@ -69,44 +127,102 @@ class TextClassifier(object):
indices = np.argsort(np.array(width_list)) indices = np.argsort(np.array(width_list))
cls_res = [['', 0.0]] * img_num cls_res = [['', 0.0]] * img_num
batch_num = self.cls_batch_num if img_num <= 0:
elapse = 0 return cls_res, 0
for beg_img_no in range(0, img_num, batch_num): max_batnum = 24
min_batnum = 8
end_img_no = min(img_num, beg_img_no + batch_num) if os.environ.get("OCR_REC_MAX_BATNUM") is not None:
norm_img_batch = [] max_batnum = int(os.environ.get("OCR_REC_MAX_BATNUM"))
max_wh_ratio = 0 if os.environ.get("OCR_REC_MIN_BATNUM") is not None:
starttime = time.time() min_batnum = int(os.environ.get("OCR_REC_MIN_BATNUM"))
for ino in range(beg_img_no, end_img_no): assert max_batnum / min_batnum == int(max_batnum / min_batnum), "max_batnum must be multiple of min_batnum."
h, w = img_list[indices[ino]].shape[0:2] img_num_left = img_num
wh_ratio = w * 1.0 / h img_no_count = 0
max_wh_ratio = max(max_wh_ratio, wh_ratio) st = time.time()
for ino in range(beg_img_no, end_img_no): if img_num_left > max_batnum:
norm_img = self.resize_norm_img(img_list[indices[ino]]) batch_num = max_batnum
norm_img = norm_img[np.newaxis, :] batch_num = int(batch_num)
norm_img_batch.append(norm_img) for beg_img_no in range(img_no_count, int(img_num_left / batch_num) * batch_num, batch_num):
norm_img_batch = np.concatenate(norm_img_batch) end_img_no = beg_img_no + batch_num
norm_img_batch = norm_img_batch.copy() norm_img_batch = []
max_wh_ratio = 0
if self.use_onnx: for ino in range(beg_img_no, end_img_no):
input_dict = {} h, w = img_list[indices[ino]].shape[0:2]
input_dict[self.input_tensor.name] = norm_img_batch wh_ratio = w * 1.0 / h
outputs = self.predictor.run(self.output_tensors, input_dict) max_wh_ratio = max(max_wh_ratio, wh_ratio)
prob_out = outputs[0] for ino in range(beg_img_no, end_img_no):
else: norm_img = self.resize_norm_img_section(img_list[indices[ino]], max_wh_ratio)
self.input_tensor.copy_from_cpu(norm_img_batch) norm_img = norm_img[np.newaxis, :]
self.predictor.run() norm_img_batch.append(norm_img)
prob_out = self.output_tensors[0].copy_to_cpu()
self.predictor.try_shrink_memory() norm_img_batch = np.concatenate(norm_img_batch, axis=0)
cls_result = self.postprocess_op(prob_out) norm_img_batch = norm_img_batch.copy()
elapse += time.time() - starttime
for rno in range(len(cls_result)): if self.use_onnx:
label, score = cls_result[rno] input_dict = {}
cls_res[indices[beg_img_no + rno]] = [label, score] input_dict[self.input_tensor.name] = norm_img_batch
if '180' in label and score > self.cls_thresh: outputs = self.predictor.run(self.output_tensors, input_dict)
img_list[indices[beg_img_no + rno]] = cv2.rotate( prob_out = outputs[0]
img_list[indices[beg_img_no + rno]], 1) else:
return img_list, cls_res, elapse self.input_tensor.copy_from_cpu(norm_img_batch)
self.predictor.run()
prob_out = self.output_tensors[0].copy_to_cpu()
self.predictor.try_shrink_memory()
cls_result = self.postprocess_op(prob_out)
for rno in range(len(cls_result)):
label, score = cls_result[rno]
cls_res[indices[beg_img_no + rno]] = [label, score]
if '180' in label and score > self.cls_thresh:
img_list[indices[beg_img_no + rno]] = cv2.rotate(
img_list[indices[beg_img_no + rno]], 1)
img_no_count = int(img_num_left / batch_num) * batch_num
img_num_left = img_num_left - int(img_num_left / batch_num) * batch_num
batch_num = math.ceil(img_num_left / min_batnum) * min_batnum
batch_num = int(batch_num)
Dnum = batch_num - img_num_left
for dno in range(Dnum):
indices = np.append(indices,img_num + dno)
cls_res.append(['', 0.0])
beg_img_no = img_no_count
end_img_no = img_num
norm_img_batch = []
max_wh_ratio = 0
for ino in range(beg_img_no, end_img_no):
h, w = img_list[indices[ino]].shape[0:2]
wh_ratio = w * 1.0 / h
max_wh_ratio = max(max_wh_ratio, wh_ratio)
for ino in range(beg_img_no, end_img_no):
norm_img = self.resize_norm_img_section(img_list[indices[ino]], max_wh_ratio)
norm_img = norm_img[np.newaxis, :]
norm_img_batch.append(norm_img)
norm_img_batch = np.concatenate(norm_img_batch)
if norm_img_batch.shape[0] != batch_num:
img_tmp = np.zeros((batch_num - norm_img_batch.shape[0], norm_img_batch.shape[1], norm_img_batch.shape[2], norm_img_batch.shape[3]), dtype=np.float32)
norm_img_batch = np.concatenate([norm_img_batch, img_tmp])
norm_img_batch = norm_img_batch.copy()
if self.use_onnx:
input_dict = {}
input_dict[self.input_tensor.name] = norm_img_batch
outputs = self.predictor.run(self.output_tensors, input_dict)
prob_out = outputs[0]
else:
self.input_tensor.copy_from_cpu(norm_img_batch)
self.predictor.run()
prob_out = self.output_tensors[0].copy_to_cpu()
self.predictor.try_shrink_memory()
cls_result = self.postprocess_op(prob_out)
for rno in range(len(cls_result)):
label, score = cls_result[rno]
cls_res[indices[beg_img_no + rno]] = [label, score]
if '180' in label and score > self.cls_thresh and (beg_img_no + rno) < img_num:
img_list[indices[beg_img_no + rno]] = cv2.rotate(
img_list[indices[beg_img_no + rno]], 1)
return img_list, cls_res, time.time() - st
def main(args): def main(args):
......
...@@ -120,15 +120,20 @@ class TextDetector(object): ...@@ -120,15 +120,20 @@ class TextDetector(object):
# print(img.shape) # print(img.shape)
img = img.copy() img = img.copy()
self.input_tensor.copy_from_cpu(img) if self.use_onnx:
self.predictor.run() input_dict = {}
paddle.device.cuda.synchronize() input_dict[self.input_tensor.name] = img
outputs = [] outputs = self.predictor.run(self.output_tensors, input_dict)
for output_tensor in self.output_tensors: else:
output = output_tensor.copy_to_cpu() self.input_tensor.copy_from_cpu(img)
outputs.append(output) self.predictor.run()
if self.args.benchmark: paddle.device.cuda.synchronize()
self.autolog.times.stamp() outputs = []
for output_tensor in self.output_tensors:
output = output_tensor.copy_to_cpu()
outputs.append(output)
if self.args.benchmark:
self.autolog.times.stamp()
preds = {} preds = {}
if self.det_algorithm in ['DB', 'PSE']: if self.det_algorithm in ['DB', 'PSE']:
......
...@@ -33,6 +33,7 @@ class TextRecognizer(object): ...@@ -33,6 +33,7 @@ class TextRecognizer(object):
self.postprocess_op = build_post_process(postprocess_params) self.postprocess_op = build_post_process(postprocess_params)
self.predictor, self.input_tensor, self.output_tensors, self.config = \ self.predictor, self.input_tensor, self.output_tensors, self.config = \
utility.create_predictor(args, 'rec', logger) utility.create_predictor(args, 'rec', logger)
self.use_onnx = args.use_onnx
def resize_norm_img_section(self, img, max_wh_ratio): def resize_norm_img_section(self, img, max_wh_ratio):
# print("rec resize for section") # print("rec resize for section")
...@@ -133,17 +134,24 @@ class TextRecognizer(object): ...@@ -133,17 +134,24 @@ class TextRecognizer(object):
norm_img_batch = np.concatenate(norm_img_batch, axis=0) norm_img_batch = np.concatenate(norm_img_batch, axis=0)
norm_img_batch = norm_img_batch.copy() norm_img_batch = norm_img_batch.copy()
self.input_tensor.copy_from_cpu(norm_img_batch) if self.use_onnx:
self.predictor.run() input_dict = {}
input_dict[self.input_tensor.name] = norm_img_batch
outputs = [] outputs = self.predictor.run(self.output_tensors,
for output_tensor in self.output_tensors: input_dict)
output = output_tensor.copy_to_cpu()
outputs.append(output)
if len(outputs) != 1:
preds = outputs
else:
preds = outputs[0] preds = outputs[0]
else:
self.input_tensor.copy_from_cpu(norm_img_batch)
self.predictor.run()
outputs = []
for output_tensor in self.output_tensors:
output = output_tensor.copy_to_cpu()
outputs.append(output)
if len(outputs) != 1:
preds = outputs
else:
preds = outputs[0]
rec_result = self.postprocess_op(preds) rec_result = self.postprocess_op(preds)
for rno in range(len(rec_result)): for rno in range(len(rec_result)):
rec_res[indices[beg_img_no + rno]] = rec_result[rno] rec_res[indices[beg_img_no + rno]] = rec_result[rno]
...@@ -176,17 +184,24 @@ class TextRecognizer(object): ...@@ -176,17 +184,24 @@ class TextRecognizer(object):
norm_img_batch = np.concatenate([norm_img_batch, img_tmp]) norm_img_batch = np.concatenate([norm_img_batch, img_tmp])
norm_img_batch = norm_img_batch.copy() norm_img_batch = norm_img_batch.copy()
self.input_tensor.copy_from_cpu(norm_img_batch) if self.use_onnx:
self.predictor.run() input_dict = {}
input_dict[self.input_tensor.name] = norm_img_batch
outputs = [] outputs = self.predictor.run(self.output_tensors,
for output_tensor in self.output_tensors: input_dict)
output = output_tensor.copy_to_cpu()
outputs.append(output)
if len(outputs) != 1:
preds = outputs
else:
preds = outputs[0] preds = outputs[0]
else:
self.input_tensor.copy_from_cpu(norm_img_batch)
self.predictor.run()
outputs = []
for output_tensor in self.output_tensors:
output = output_tensor.copy_to_cpu()
outputs.append(output)
if len(outputs) != 1:
preds = outputs
else:
preds = outputs[0]
rec_result = self.postprocess_op(preds) rec_result = self.postprocess_op(preds)
for rno in range(len(rec_result)): for rno in range(len(rec_result)):
rec_res[indices[beg_img_no + rno]] = rec_result[rno] rec_res[indices[beg_img_no + rno]] = rec_result[rno]
......
...@@ -133,6 +133,7 @@ def main(args): ...@@ -133,6 +133,7 @@ def main(args):
img_rec_list = [] img_rec_list = []
for i in range(min_batnum * (bn + 1)): for i in range(min_batnum * (bn + 1)):
img_rec_list.append(img_warm_rec) img_rec_list.append(img_warm_rec)
cls_results = text_sys.text_classifier(img_rec_list)
rec_results = text_sys.text_recognizer(img_rec_list) rec_results = text_sys.text_recognizer(img_rec_list)
elapsewarm = time.time() - startwarm elapsewarm = time.time() - startwarm
logger.debug("warmup time:{}".format(elapsewarm)) logger.debug("warmup time:{}".format(elapsewarm))
......
...@@ -107,150 +107,160 @@ def create_predictor(args, mode, logger): ...@@ -107,150 +107,160 @@ def create_predictor(args, mode, logger):
logger.info("not find {} model file path {}".format(mode, model_dir)) logger.info("not find {} model file path {}".format(mode, model_dir))
sys.exit(0) sys.exit(0)
model_file_path = model_dir + "/inference.pdmodel" if args.use_onnx:
params_file_path = model_dir + "/inference.pdiparams" import onnxruntime as ort
if not os.path.exists(model_file_path): model_file_path = model_dir
raise ValueError("not find model file path {}".format( if not os.path.exists(model_file_path):
model_file_path)) raise ValueError("not find model file path {}".format(
if not os.path.exists(params_file_path): model_file_path))
raise ValueError("not find params file path {}".format( sess = ort.InferenceSession(model_file_path, providers=[('ROCMExecutionProvider', {'device_id': '4'}),'CPUExecutionProvider'])
params_file_path)) return sess, sess.get_inputs()[0], None, None
config = inference.Config(model_file_path, params_file_path) else:
model_file_path = model_dir + "/inference.pdmodel"
if hasattr(args, 'precision'): params_file_path = model_dir + "/inference.pdiparams"
if args.precision == "fp16" and args.use_tensorrt: if not os.path.exists(model_file_path):
precision = inference.PrecisionType.Half raise ValueError("not find model file path {}".format(
print("fp16 set success!") model_file_path))
elif args.precision == "int8": if not os.path.exists(params_file_path):
precision = inference.PrecisionType.Int8 raise ValueError("not find params file path {}".format(
params_file_path))
config = inference.Config(model_file_path, params_file_path)
if hasattr(args, 'precision'):
if args.precision == "fp16" and args.use_tensorrt:
precision = inference.PrecisionType.Half
print("fp16 set success!")
elif args.precision == "int8":
precision = inference.PrecisionType.Int8
else:
precision = inference.PrecisionType.Float32
else: else:
precision = inference.PrecisionType.Float32 precision = inference.PrecisionType.Float32
else:
precision = inference.PrecisionType.Float32 if args.use_gpu:
gpu_id = get_infer_gpuid()
if args.use_gpu: if gpu_id is None:
gpu_id = get_infer_gpuid() logger.warning(
if gpu_id is None: "GPU is not found in current device by nvidia-smi. Please check your device or ignore it if run on jetson."
logger.warning( )
"GPU is not found in current device by nvidia-smi. Please check your device or ignore it if run on jetson." config.enable_use_gpu(args.gpu_mem, 0)
) use_dynamic_shape = True
config.enable_use_gpu(args.gpu_mem, 0) if mode == "det":
use_dynamic_shape = True min_input_shape = {
if mode == "det": "x": [1, 3, 50, 50],
min_input_shape = { "conv2d_92.tmp_0": [1, 120, 20, 20],
"x": [1, 3, 50, 50], "conv2d_91.tmp_0": [1, 24, 10, 10],
"conv2d_92.tmp_0": [1, 120, 20, 20], "conv2d_59.tmp_0": [1, 96, 20, 20],
"conv2d_91.tmp_0": [1, 24, 10, 10], "nearest_interp_v2_1.tmp_0": [1, 256, 10, 10],
"conv2d_59.tmp_0": [1, 96, 20, 20], "nearest_interp_v2_2.tmp_0": [1, 256, 20, 20],
"nearest_interp_v2_1.tmp_0": [1, 256, 10, 10], "conv2d_124.tmp_0": [1, 256, 20, 20],
"nearest_interp_v2_2.tmp_0": [1, 256, 20, 20], "nearest_interp_v2_3.tmp_0": [1, 64, 20, 20],
"conv2d_124.tmp_0": [1, 256, 20, 20], "nearest_interp_v2_4.tmp_0": [1, 64, 20, 20],
"nearest_interp_v2_3.tmp_0": [1, 64, 20, 20], "nearest_interp_v2_5.tmp_0": [1, 64, 20, 20],
"nearest_interp_v2_4.tmp_0": [1, 64, 20, 20], "elementwise_add_7": [1, 56, 2, 2],
"nearest_interp_v2_5.tmp_0": [1, 64, 20, 20], "nearest_interp_v2_0.tmp_0": [1, 256, 2, 2]
"elementwise_add_7": [1, 56, 2, 2], }
"nearest_interp_v2_0.tmp_0": [1, 256, 2, 2] max_input_shape = {
} "x": [1, 3, 1536, 1536],
max_input_shape = { "conv2d_92.tmp_0": [1, 120, 400, 400],
"x": [1, 3, 1536, 1536], "conv2d_91.tmp_0": [1, 24, 200, 200],
"conv2d_92.tmp_0": [1, 120, 400, 400], "conv2d_59.tmp_0": [1, 96, 400, 400],
"conv2d_91.tmp_0": [1, 24, 200, 200], "nearest_interp_v2_1.tmp_0": [1, 256, 200, 200],
"conv2d_59.tmp_0": [1, 96, 400, 400], "conv2d_124.tmp_0": [1, 256, 400, 400],
"nearest_interp_v2_1.tmp_0": [1, 256, 200, 200], "nearest_interp_v2_2.tmp_0": [1, 256, 400, 400],
"conv2d_124.tmp_0": [1, 256, 400, 400], "nearest_interp_v2_3.tmp_0": [1, 64, 400, 400],
"nearest_interp_v2_2.tmp_0": [1, 256, 400, 400], "nearest_interp_v2_4.tmp_0": [1, 64, 400, 400],
"nearest_interp_v2_3.tmp_0": [1, 64, 400, 400], "nearest_interp_v2_5.tmp_0": [1, 64, 400, 400],
"nearest_interp_v2_4.tmp_0": [1, 64, 400, 400], "elementwise_add_7": [1, 56, 400, 400],
"nearest_interp_v2_5.tmp_0": [1, 64, 400, 400], "nearest_interp_v2_0.tmp_0": [1, 256, 400, 400]
"elementwise_add_7": [1, 56, 400, 400], }
"nearest_interp_v2_0.tmp_0": [1, 256, 400, 400] opt_input_shape = {
} "x": [1, 3, 640, 640],
opt_input_shape = { "conv2d_92.tmp_0": [1, 120, 160, 160],
"x": [1, 3, 640, 640], "conv2d_91.tmp_0": [1, 24, 80, 80],
"conv2d_92.tmp_0": [1, 120, 160, 160], "conv2d_59.tmp_0": [1, 96, 160, 160],
"conv2d_91.tmp_0": [1, 24, 80, 80], "nearest_interp_v2_1.tmp_0": [1, 256, 80, 80],
"conv2d_59.tmp_0": [1, 96, 160, 160], "nearest_interp_v2_2.tmp_0": [1, 256, 160, 160],
"nearest_interp_v2_1.tmp_0": [1, 256, 80, 80], "conv2d_124.tmp_0": [1, 256, 160, 160],
"nearest_interp_v2_2.tmp_0": [1, 256, 160, 160], "nearest_interp_v2_3.tmp_0": [1, 64, 160, 160],
"conv2d_124.tmp_0": [1, 256, 160, 160], "nearest_interp_v2_4.tmp_0": [1, 64, 160, 160],
"nearest_interp_v2_3.tmp_0": [1, 64, 160, 160], "nearest_interp_v2_5.tmp_0": [1, 64, 160, 160],
"nearest_interp_v2_4.tmp_0": [1, 64, 160, 160], "elementwise_add_7": [1, 56, 40, 40],
"nearest_interp_v2_5.tmp_0": [1, 64, 160, 160], "nearest_interp_v2_0.tmp_0": [1, 256, 40, 40]
"elementwise_add_7": [1, 56, 40, 40], }
"nearest_interp_v2_0.tmp_0": [1, 256, 40, 40] min_pact_shape = {
} "nearest_interp_v2_26.tmp_0": [1, 256, 20, 20],
min_pact_shape = { "nearest_interp_v2_27.tmp_0": [1, 64, 20, 20],
"nearest_interp_v2_26.tmp_0": [1, 256, 20, 20], "nearest_interp_v2_28.tmp_0": [1, 64, 20, 20],
"nearest_interp_v2_27.tmp_0": [1, 64, 20, 20], "nearest_interp_v2_29.tmp_0": [1, 64, 20, 20]
"nearest_interp_v2_28.tmp_0": [1, 64, 20, 20], }
"nearest_interp_v2_29.tmp_0": [1, 64, 20, 20] max_pact_shape = {
} "nearest_interp_v2_26.tmp_0": [1, 256, 400, 400],
max_pact_shape = { "nearest_interp_v2_27.tmp_0": [1, 64, 400, 400],
"nearest_interp_v2_26.tmp_0": [1, 256, 400, 400], "nearest_interp_v2_28.tmp_0": [1, 64, 400, 400],
"nearest_interp_v2_27.tmp_0": [1, 64, 400, 400], "nearest_interp_v2_29.tmp_0": [1, 64, 400, 400]
"nearest_interp_v2_28.tmp_0": [1, 64, 400, 400], }
"nearest_interp_v2_29.tmp_0": [1, 64, 400, 400] opt_pact_shape = {
} "nearest_interp_v2_26.tmp_0": [1, 256, 160, 160],
opt_pact_shape = { "nearest_interp_v2_27.tmp_0": [1, 64, 160, 160],
"nearest_interp_v2_26.tmp_0": [1, 256, 160, 160], "nearest_interp_v2_28.tmp_0": [1, 64, 160, 160],
"nearest_interp_v2_27.tmp_0": [1, 64, 160, 160], "nearest_interp_v2_29.tmp_0": [1, 64, 160, 160]
"nearest_interp_v2_28.tmp_0": [1, 64, 160, 160], }
"nearest_interp_v2_29.tmp_0": [1, 64, 160, 160] min_input_shape.update(min_pact_shape)
} max_input_shape.update(max_pact_shape)
min_input_shape.update(min_pact_shape) opt_input_shape.update(opt_pact_shape)
max_input_shape.update(max_pact_shape) elif mode == "rec":
opt_input_shape.update(opt_pact_shape) if args.rec_algorithm not in ["CRNN", "SVTR_LCNet"]:
elif mode == "rec": use_dynamic_shape = False
if args.rec_algorithm not in ["CRNN", "SVTR_LCNet"]: imgH = int(args.rec_image_shape.split(',')[-2])
min_input_shape = {"x": [1, 3, imgH, 10]}
max_input_shape = {"x": [args.rec_batch_num, 3, imgH, 2304]}
opt_input_shape = {"x": [args.rec_batch_num, 3, imgH, 320]}
config.exp_disable_tensorrt_ops(["transpose2"])
elif mode == "cls":
min_input_shape = {"x": [1, 3, 48, 10]}
max_input_shape = {"x": [args.rec_batch_num, 3, 48, 1024]}
opt_input_shape = {"x": [args.rec_batch_num, 3, 48, 320]}
else:
use_dynamic_shape = False use_dynamic_shape = False
imgH = int(args.rec_image_shape.split(',')[-2]) if use_dynamic_shape:
min_input_shape = {"x": [1, 3, imgH, 10]} config.set_trt_dynamic_shape_info(
max_input_shape = {"x": [args.rec_batch_num, 3, imgH, 2304]} min_input_shape, max_input_shape, opt_input_shape)
opt_input_shape = {"x": [args.rec_batch_num, 3, imgH, 320]}
config.exp_disable_tensorrt_ops(["transpose2"])
elif mode == "cls":
min_input_shape = {"x": [1, 3, 48, 10]}
max_input_shape = {"x": [args.rec_batch_num, 3, 48, 1024]}
opt_input_shape = {"x": [args.rec_batch_num, 3, 48, 320]}
else:
use_dynamic_shape = False
if use_dynamic_shape:
config.set_trt_dynamic_shape_info(
min_input_shape, max_input_shape, opt_input_shape)
elif args.use_xpu: elif args.use_xpu:
config.enable_xpu(10 * 1024 * 1024) config.enable_xpu(10 * 1024 * 1024)
else:
config.disable_gpu()
if hasattr(args, "cpu_threads"):
config.set_cpu_math_library_num_threads(args.cpu_threads)
else: else:
# default cpu threads as 10 config.disable_gpu()
config.set_cpu_math_library_num_threads(10) if hasattr(args, "cpu_threads"):
if args.enable_mkldnn: config.set_cpu_math_library_num_threads(args.cpu_threads)
# cache 10 different shapes for mkldnn to avoid memory leak else:
config.set_mkldnn_cache_capacity(10) # default cpu threads as 10
config.enable_mkldnn() config.set_cpu_math_library_num_threads(10)
if args.precision == "fp16": if args.enable_mkldnn:
config.enable_mkldnn_bfloat16() # cache 10 different shapes for mkldnn to avoid memory leak
# enable memory optim config.set_mkldnn_cache_capacity(10)
config.enable_memory_optim() config.enable_mkldnn()
config.disable_glog_info() if args.precision == "fp16":
config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass") config.enable_mkldnn_bfloat16()
config.delete_pass("matmul_transpose_reshape_fuse_pass") # enable memory optim
if mode == 'table': config.enable_memory_optim()
config.delete_pass("fc_fuse_pass") # not supported for table config.disable_glog_info()
config.switch_use_feed_fetch_ops(False) config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass")
config.switch_ir_optim(True) config.delete_pass("matmul_transpose_reshape_fuse_pass")
if mode == 'table':
# create predictor config.delete_pass("fc_fuse_pass") # not supported for table
predictor = inference.create_predictor(config) config.switch_use_feed_fetch_ops(False)
input_names = predictor.get_input_names() config.switch_ir_optim(True)
for name in input_names:
input_tensor = predictor.get_input_handle(name) # create predictor
output_tensors = get_output_tensors(args, mode, predictor) predictor = inference.create_predictor(config)
return predictor, input_tensor, output_tensors, config input_names = predictor.get_input_names()
for name in input_names:
input_tensor = predictor.get_input_handle(name)
output_tensors = get_output_tensors(args, mode, predictor)
return predictor, input_tensor, output_tensors, config
def get_output_tensors(args, mode, predictor): def get_output_tensors(args, mode, predictor):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment