fix TRT8 core bug

03bb378f · LDOUBLEV · a2a12fe4 · 2e9abcb9 · 03bb378f · 03bb378f
Commit 03bb378f authored Nov 25, 2021 by LDOUBLEV
20 changed files
--- a/PTDN/results/cpp_ppocr_det_mobile_results_fp16.txt
+++ b/PTDN/results/cpp_ppocr_det_mobile_results_fp16.txt
--- a/PTDN/results/cpp_ppocr_det_mobile_results_fp32.txt
+++ b/PTDN/results/cpp_ppocr_det_mobile_results_fp32.txt
--- a/PTDN/results/python_ppocr_det_mobile_results_fp16.txt
+++ b/PTDN/results/python_ppocr_det_mobile_results_fp16.txt
--- a/PTDN/results/python_ppocr_det_mobile_results_fp32.txt
+++ b/PTDN/results/python_ppocr_det_mobile_results_fp32.txt
--- a/PTDN/test_inference_cpp.sh
+++ b/PTDN/test_inference_cpp.sh
 #!/bin/bash
-source tests/common_func.sh
+source test_tipc/common_func.sh
 FILENAME=$1
-dataline=$(awk 'NR==52, NR==66{print}'  $FILENAME)
+dataline=$(awk 'NR==1, NR==16{print}'  $FILENAME)
 # parser params
 IFS=$'\n'
 lines=(${dataline})
 # parser cpp inference model 
-use_opencv=$(func_parser_value "${lines[1]}")
+model_name=$(func_parser_value "${lines[1]}")
-cpp_infer_model_dir_list=$(func_parser_value "${lines[2]}")
+use_opencv=$(func_parser_value "${lines[2]}")
-cpp_infer_is_quant=$(func_parser_value "${lines[3]}")
+cpp_infer_model_dir_list=$(func_parser_value "${lines[3]}")
+cpp_infer_is_quant=$(func_parser_value "${lines[4]}")
 # parser cpp inference 
-inference_cmd=$(func_parser_value "${lines[4]}")
+inference_cmd=$(func_parser_value "${lines[5]}")
-cpp_use_gpu_key=$(func_parser_key "${lines[5]}")
+cpp_use_gpu_key=$(func_parser_key "${lines[6]}")
-cpp_use_gpu_list=$(func_parser_value "${lines[5]}")
+cpp_use_gpu_list=$(func_parser_value "${lines[6]}")
-cpp_use_mkldnn_key=$(func_parser_key "${lines[6]}")
+cpp_use_mkldnn_key=$(func_parser_key "${lines[7]}")
-cpp_use_mkldnn_list=$(func_parser_value "${lines[6]}")
+cpp_use_mkldnn_list=$(func_parser_value "${lines[7]}")
-cpp_cpu_threads_key=$(func_parser_key "${lines[7]}")
+cpp_cpu_threads_key=$(func_parser_key "${lines[8]}")
-cpp_cpu_threads_list=$(func_parser_value "${lines[7]}")
+cpp_cpu_threads_list=$(func_parser_value "${lines[8]}")
-cpp_batch_size_key=$(func_parser_key "${lines[8]}")
+cpp_batch_size_key=$(func_parser_key "${lines[9]}")
-cpp_batch_size_list=$(func_parser_value "${lines[8]}")
+cpp_batch_size_list=$(func_parser_value "${lines[9]}")
-cpp_use_trt_key=$(func_parser_key "${lines[9]}")
+cpp_use_trt_key=$(func_parser_key "${lines[10]}")
-cpp_use_trt_list=$(func_parser_value "${lines[9]}")
+cpp_use_trt_list=$(func_parser_value "${lines[10]}")
-cpp_precision_key=$(func_parser_key "${lines[10]}")
+cpp_precision_key=$(func_parser_key "${lines[11]}")
-cpp_precision_list=$(func_parser_value "${lines[10]}")
+cpp_precision_list=$(func_parser_value "${lines[11]}")
-cpp_infer_model_key=$(func_parser_key "${lines[11]}")
+cpp_infer_model_key=$(func_parser_key "${lines[12]}")
-cpp_image_dir_key=$(func_parser_key "${lines[12]}")
+cpp_image_dir_key=$(func_parser_key "${lines[13]}")
-cpp_infer_img_dir=$(func_parser_value "${lines[12]}")
+cpp_infer_img_dir=$(func_parser_value "${lines[13]}")
-cpp_infer_key1=$(func_parser_key "${lines[13]}")
+cpp_infer_key1=$(func_parser_key "${lines[14]}")
-cpp_infer_value1=$(func_parser_value "${lines[13]}")
+cpp_infer_value1=$(func_parser_value "${lines[14]}")
-cpp_benchmark_key=$(func_parser_key "${lines[14]}")
+cpp_benchmark_key=$(func_parser_key "${lines[15]}")
-cpp_benchmark_value=$(func_parser_value "${lines[14]}")
+cpp_benchmark_value=$(func_parser_value "${lines[15]}")
+LOG_PATH="./test_tipc/output"
-LOG_PATH="./tests/output"
 mkdir -p ${LOG_PATH}
 status_log="${LOG_PATH}/results_cpp.log"

--- a/test_tipc/test_inference_jeston.sh
+++ b/test_tipc/test_inference_jeston.sh
+#!/bin/bash
+source test_tipc/common_func.sh
+source test_tipc/test_train_inference_python.sh
+FILENAME=$1
+# MODE be one of ['whole_infer']
+MODE=$2
+dataline=$(awk 'NR==1, NR==17{print}'  $FILENAME)
+# parser params
+IFS=$'\n'
+lines=(${dataline})
+model_name=$(func_parser_value "${lines[1]}")
+python=$(func_parser_value "${lines[2]}")
+infer_model_dir_list=$(func_parser_value "${lines[3]}")
+infer_export_list=$(func_parser_value "${lines[4]}")
+infer_is_quant=$(func_parser_value "${lines[5]}")
+# parser inference 
+inference_py=$(func_parser_value "${lines[6]}")
+use_gpu_key=$(func_parser_key "${lines[7]}")
+use_gpu_list=$(func_parser_value "${lines[7]}")
+use_mkldnn_key=$(func_parser_key "${lines[8]}")
+use_mkldnn_list=$(func_parser_value "${lines[8]}")
+cpu_threads_key=$(func_parser_key "${lines[9]}")
+cpu_threads_list=$(func_parser_value "${lines[9]}")
+batch_size_key=$(func_parser_key "${lines[10]}")
+batch_size_list=$(func_parser_value "${lines[10]}")
+use_trt_key=$(func_parser_key "${lines[11]}")
+use_trt_list=$(func_parser_value "${lines[11]}")
+precision_key=$(func_parser_key "${lines[12]}")
+precision_list=$(func_parser_value "${lines[12]}")
+infer_model_key=$(func_parser_key "${lines[13]}")
+image_dir_key=$(func_parser_key "${lines[14]}")
+infer_img_dir=$(func_parser_value "${lines[14]}")
+save_log_key=$(func_parser_key "${lines[15]}")
+benchmark_key=$(func_parser_key "${lines[16]}")
+benchmark_value=$(func_parser_value "${lines[16]}")
+infer_key1=$(func_parser_key "${lines[17]}")
+infer_value1=$(func_parser_value "${lines[17]}")
+LOG_PATH="./test_tipc/output"
+mkdir -p ${LOG_PATH}
+status_log="${LOG_PATH}/results_python.log"
+if [ ${MODE} = "whole_infer" ]; then
+    GPUID=$3
+    if [ ${#GPUID} -le 0 ];then
+        env=" "
+    else
+        env="export CUDA_VISIBLE_DEVICES=${GPUID}"
+    fi
+    # set CUDA_VISIBLE_DEVICES
+    eval $env
+    export Count=0
+    IFS="|"
+    infer_run_exports=(${infer_export_list})
+    infer_quant_flag=(${infer_is_quant})
+    for infer_model in ${infer_model_dir_list[*]}; do
+        # run export
+        if [ ${infer_run_exports[Count]} != "null" ];then
+            save_infer_dir=$(dirname $infer_model)
+            set_export_weight=$(func_set_params "${export_weight}" "${infer_model}")
+            set_save_infer_key=$(func_set_params "${save_infer_key}" "${save_infer_dir}")
+            export_cmd="${python} ${infer_run_exports[Count]} ${set_export_weight} ${set_save_infer_key}"
+            echo ${infer_run_exports[Count]} 
+            echo  $export_cmd
+            eval $export_cmd
+            status_export=$?
+            status_check $status_export "${export_cmd}" "${status_log}"
+        else
+            save_infer_dir=${infer_model}
+        fi
+        #run inference
+        is_quant=${infer_quant_flag[Count]}
+        if [ ${MODE} = "klquant_infer" ]; then
+            is_quant="True"
+        fi
+        func_inference "${python}" "${inference_py}" "${save_infer_dir}" "${LOG_PATH}" "${infer_img_dir}" ${is_quant}
+        Count=$(($Count + 1))
+    done
+fi
--- a/test_tipc/test_lite_arm_cpp.sh
+++ b/test_tipc/test_lite_arm_cpp.sh
+#!/bin/bash
+source ./common_func.sh
+export LD_LIBRARY_PATH=${PWD}:$LD_LIBRARY_PATH
+FILENAME=$1
+dataline=$(cat $FILENAME)
+# parser params
+IFS=$'\n'
+lines=(${dataline})
+# parser lite inference
+inference_cmd=$(func_parser_value "${lines[1]}")
+runtime_device=$(func_parser_value "${lines[2]}")
+det_model_list=$(func_parser_value "${lines[3]}")
+rec_model_list=$(func_parser_value "${lines[4]}")
+cls_model_list=$(func_parser_value "${lines[5]}")
+cpu_threads_list=$(func_parser_value "${lines[6]}")
+det_batch_size_list=$(func_parser_value "${lines[7]}")
+rec_batch_size_list=$(func_parser_value "${lines[8]}")
+infer_img_dir_list=$(func_parser_value "${lines[9]}")
+config_dir=$(func_parser_value "${lines[10]}")
+rec_dict_dir=$(func_parser_value "${lines[11]}")
+benchmark_value=$(func_parser_value "${lines[12]}")
+if [[ $inference_cmd =~ "det" ]]; then
+    lite_model_list=${det_lite_model_list}
+elif [[ $inference_cmd =~ "rec" ]]; then
+    lite_model_list=(${rec_lite_model_list[*]} ${cls_lite_model_list[*]})
+elif [[ $inference_cmd =~ "system" ]]; then
+    lite_model_list=(${det_lite_model_list[*]} ${rec_lite_model_list[*]} ${cls_lite_model_list[*]})
+else
+    echo "inference_cmd is wrong, please check."
+    exit 1
+fi
+LOG_PATH="./output"
+mkdir -p ${LOG_PATH}
+status_log="${LOG_PATH}/results.log"
+function func_test_det(){
+    IFS='|'
+    _script=$1
+    _det_model=$2
+    _log_path=$3
+    _img_dir=$4
+    _config=$5
+    if [[ $_det_model =~ "slim" ]]; then
+        precision="INT8"
+    else
+        precision="FP32"
+    fi
+    # lite inference
+    for num_threads in ${cpu_threads_list[*]}; do
+	for det_batchsize in ${det_batch_size_list[*]}; do
+            _save_log_path="${_log_path}/lite_${_det_model}_runtime_device_${runtime_device}_precision_${precision}_det_batchsize_${det_batchsize}_threads_${num_threads}.log"
+            command="${_script} ${_det_model} ${runtime_device} ${precision} ${num_threads} ${det_batchsize}  ${_img_dir} ${_config} ${benchmark_value} > ${_save_log_path} 2>&1"
+            eval ${command}
+            status_check $? "${command}" "${status_log}"
+        done
+    done
+}
+function func_test_rec(){
+    IFS='|'
+    _script=$1
+    _rec_model=$2
+    _cls_model=$3
+    _log_path=$4
+    _img_dir=$5
+    _config=$6
+    _rec_dict_dir=$7
+    if [[ $_det_model =~ "slim" ]]; then
+        _precision="INT8"
+    else
+        _precision="FP32"
+    fi
+    # lite inference
+    for num_threads in ${cpu_threads_list[*]}; do
+	for rec_batchsize in ${rec_batch_size_list[*]}; do
+            _save_log_path="${_log_path}/lite_${_rec_model}_${cls_model}_runtime_device_${runtime_device}_precision_${_precision}_rec_batchsize_${rec_batchsize}_threads_${num_threads}.log"
+            command="${_script} ${_rec_model} ${_cls_model} ${runtime_device} ${_precision} ${num_threads} ${rec_batchsize}  ${_img_dir} ${_config} ${_rec_dict_dir} ${benchmark_value} > ${_save_log_path} 2>&1"
+            eval ${command}
+            status_check $? "${command}" "${status_log}"
+        done
+    done
+}
+function func_test_system(){
+    IFS='|'
+    _script=$1
+    _det_model=$2
+    _rec_model=$3
+    _cls_model=$4
+    _log_path=$5
+    _img_dir=$6
+    _config=$7
+    _rec_dict_dir=$8
+    if [[ $_det_model =~ "slim" ]]; then
+        _precision="INT8"
+    else
+        _precision="FP32"
+    fi
+    # lite inference
+    for num_threads in ${cpu_threads_list[*]}; do
+	for det_batchsize in ${det_batch_size_list[*]}; do
+	   for rec_batchsize in ${rec_batch_size_list[*]}; do
+                _save_log_path="${_log_path}/lite_${_det_model}_${_rec_model}_${_cls_model}_runtime_device_${runtime_device}_precision_${_precision}_det_batchsize_${det_batchsize}_rec_batchsize_${rec_batchsize}_threads_${num_threads}.log"
+                command="${_script} ${_det_model} ${_rec_model} ${_cls_model} ${runtime_device} ${_precision} ${num_threads} ${det_batchsize}  ${_img_dir} ${_config} ${_rec_dict_dir} ${benchmark_value} > ${_save_log_path} 2>&1"
+               eval ${command}
+               status_check $? "${command}" "${status_log}"
+	    done
+        done
+    done
+}
+echo "################### run test ###################"
+if [[ $inference_cmd =~ "det" ]]; then
+    IFS="|"
+    det_model_list=(${det_model_list[*]})
+    for i in {0..1}; do
+        #run lite inference
+        for img_dir in ${infer_img_dir_list[*]}; do
+            func_test_det "${inference_cmd}" "${det_model_list[i]}_opt.nb" "${LOG_PATH}" "${img_dir}" "${config_dir}"
+        done
+    done
+elif [[ $inference_cmd =~ "rec" ]]; then
+    IFS="|"
+    rec_model_list=(${rec_model_list[*]})
+    cls_model_list=(${cls_model_list[*]})
+    for i in {0..1}; do
+        #run lite inference
+        for img_dir in ${infer_img_dir_list[*]}; do
+            func_test_rec "${inference_cmd}" "${rec_model}_opt.nb" "${cls_model_list[i]}_opt.nb" "${LOG_PATH}" "${img_dir}" "${rec_dict_dir}" "${config_dir}"
+        done
+    done
+elif [[ $inference_cmd =~ "system" ]]; then
+    IFS="|"
+    det_model_list=(${det_model_list[*]})
+    rec_model_list=(${rec_model_list[*]})
+    cls_model_list=(${cls_model_list[*]})
+    for i in {0..1}; do
+	#run lite inference
+        for img_dir in ${infer_img_dir_list[*]}; do
+            func_test_system "${inference_cmd}" "${det_model_list[i]}_opt.nb" "${rec_model_list[i]}_opt.nb" "${cls_model_list[i]}_opt.nb" "${LOG_PATH}" "${img_dir}" "${config_dir}" "${rec_dict_dir}"
+        done
+    done
+fi
--- a/test_tipc/test_paddle2onnx.sh
+++ b/test_tipc/test_paddle2onnx.sh
+#!/bin/bash
+source test_tipc/common_func.sh 
+FILENAME=$1
+dataline=$(cat ${FILENAME})
+lines=(${dataline})
+# common params
+model_name=$(func_parser_value "${lines[1]}")
+python=$(func_parser_value "${lines[2]}")
+# parser params
+dataline=$(awk 'NR==1, NR==12{print}'  $FILENAME)
+IFS=$'\n'
+lines=(${dataline})
+# parser paddle2onnx
+model_name=$(func_parser_value "${lines[1]}")
+python=$(func_parser_value "${lines[2]}")
+padlle2onnx_cmd=$(func_parser_value "${lines[3]}")
+infer_model_dir_key=$(func_parser_key "${lines[4]}")
+infer_model_dir_value=$(func_parser_value "${lines[4]}")
+model_filename_key=$(func_parser_key "${lines[5]}")
+model_filename_value=$(func_parser_value "${lines[5]}")
+params_filename_key=$(func_parser_key "${lines[6]}")
+params_filename_value=$(func_parser_value "${lines[6]}")
+save_file_key=$(func_parser_key "${lines[7]}")
+save_file_value=$(func_parser_value "${lines[7]}")
+opset_version_key=$(func_parser_key "${lines[8]}")
+opset_version_value=$(func_parser_value "${lines[8]}")
+enable_onnx_checker_key=$(func_parser_key "${lines[9]}")
+enable_onnx_checker_value=$(func_parser_value "${lines[9]}")
+# parser onnx inference 
+inference_py=$(func_parser_value "${lines[10]}")
+use_gpu_key=$(func_parser_key "${lines[11]}")
+use_gpu_value=$(func_parser_value "${lines[11]}")
+det_model_key=$(func_parser_key "${lines[12]}")
+image_dir_key=$(func_parser_key "${lines[13]}")
+image_dir_value=$(func_parser_value "${lines[13]}")
+LOG_PATH="./test_tipc/output"
+mkdir -p ./test_tipc/output
+status_log="${LOG_PATH}/results_paddle2onnx.log"
+function func_paddle2onnx(){
+    IFS='|'
+    _script=$1
+    # paddle2onnx
+    _save_log_path="${LOG_PATH}/paddle2onnx_infer_cpu.log"
+    set_dirname=$(func_set_params "${infer_model_dir_key}" "${infer_model_dir_value}")
+    set_model_filename=$(func_set_params "${model_filename_key}" "${model_filename_value}")
+    set_params_filename=$(func_set_params "${params_filename_key}" "${params_filename_value}")
+    set_save_model=$(func_set_params "${save_file_key}" "${save_file_value}")
+    set_opset_version=$(func_set_params "${opset_version_key}" "${opset_version_value}")
+    set_enable_onnx_checker=$(func_set_params "${enable_onnx_checker_key}" "${enable_onnx_checker_value}")
+    trans_model_cmd="${padlle2onnx_cmd} ${set_dirname} ${set_model_filename} ${set_params_filename} ${set_save_model} ${set_opset_version} ${set_enable_onnx_checker}"
+    eval $trans_model_cmd
+    last_status=${PIPESTATUS[0]}
+    status_check $last_status "${trans_model_cmd}" "${status_log}"
+    # python inference
+    set_gpu=$(func_set_params "${use_gpu_key}" "${use_gpu_value}")
+    set_model_dir=$(func_set_params "${det_model_key}" "${save_file_value}")
+    set_img_dir=$(func_set_params "${image_dir_key}" "${image_dir_value}")
+    infer_model_cmd="${python} ${inference_py} ${set_gpu} ${set_img_dir} ${set_model_dir} --use_onnx=True > ${_save_log_path} 2>&1 "
+    eval $infer_model_cmd
+    status_check $last_status "${infer_model_cmd}" "${status_log}"
+}
+echo "################### run test ###################"
+export Count=0
+IFS="|"
+func_paddle2onnx 
\ No newline at end of file
--- a/PTDN/test_serving.sh
+++ b/PTDN/test_serving.sh
 #!/bin/bash
-source PTDN/common_func.sh
+source test_tipc/common_func.sh
 FILENAME=$1
-dataline=$(awk 'NR==67, NR==83{print}'  $FILENAME)
+dataline=$(awk 'NR==1, NR==18{print}'  $FILENAME)
 # parser params
 IFS=$'\n'
@@ -35,9 +35,11 @@ web_use_trt_list=$(func_parser_value "${lines[14]}")
 web_precision_key=$(func_parser_key "${lines[15]}")
 web_precision_list=$(func_parser_value "${lines[15]}")
 pipeline_py=$(func_parser_value "${lines[16]}")
+image_dir_key=$(func_parser_key "${lines[17]}")
+image_dir_value=$(func_parser_value "${lines[17]}")
-LOG_PATH="../../PTDN/output"
+LOG_PATH="../../test_tipc/output"
-mkdir -p ./PTDN/output
+mkdir -p ./test_tipc/output
 status_log="${LOG_PATH}/results_serving.log"
 function func_serving(){
@@ -51,67 +53,98 @@ function func_serving(){
    set_params_filename=$(func_set_params "${params_filename_key}" "${params_filename_value}")
    set_serving_server=$(func_set_params "${serving_server_key}" "${serving_server_value}")
    set_serving_client=$(func_set_params "${serving_client_key}" "${serving_client_value}")
+    set_image_dir=$(func_set_params "${image_dir_key}" "${image_dir_value}")
    trans_model_cmd="${python} ${trans_model_py} ${set_dirname} ${set_model_filename} ${set_params_filename} ${set_serving_server} ${set_serving_client}"
    eval $trans_model_cmd
    cd ${serving_dir_value}
    echo $PWD
    unset https_proxy
    unset http_proxy
-    for use_gpu in ${web_use_gpu_list[*]}; do
+    for python in ${python[*]}; do
-        echo ${ues_gpu}
+        if [ ${python} = "cpp"]; then
-        if [ ${use_gpu} = "null" ]; then
+            for use_gpu in ${web_use_gpu_list[*]}; do
-            for use_mkldnn in ${web_use_mkldnn_list[*]}; do
+                if [ ${use_gpu} = "null" ]; then
-                if [ ${use_mkldnn} = "False" ]; then
+                    web_service_cpp_cmd="${python} -m paddle_serving_server.serve --model ppocr_det_mobile_2.0_serving/ ppocr_rec_mobile_2.0_serving/ --port 9293"
-                    continue
-                fi
-                for threads in ${web_cpu_threads_list[*]}; do
-                      _save_log_path="${LOG_PATH}/server_infer_cpu_usemkldnn_${use_mkldnn}_threads_${threads}_batchsize_1.log"
-                      set_cpu_threads=$(func_set_params "${web_cpu_threads_key}" "${threads}")
-                      web_service_cmd="${python} ${web_service_py} ${web_use_gpu_key}=${use_gpu} ${web_use_mkldnn_key}=${use_mkldnn} ${set_cpu_threads} &"
-                      eval $web_service_cmd
-                      sleep 2s
-                      pipeline_cmd="${python} ${pipeline_py} > ${_save_log_path} 2>&1 "
-                      eval $pipeline_cmd
-                      last_status=${PIPESTATUS[0]}
-                      eval "cat ${_save_log_path}"
-                      status_check $last_status "${pipeline_cmd}" "${status_log}"
-                      PID=$!
-                      kill $PID
-                      sleep 2s
-                      ps ux | grep -E 'web_service|pipeline' | awk '{print $2}' | xargs kill -s 9
-                done
-            done
-        elif [ ${use_gpu} = "0" ]; then
-            for use_trt in ${web_use_trt_list[*]}; do
-                for precision in ${web_precision_list[*]}; do
-                    if [[ ${_flag_quant} = "False" ]] && [[ ${precision} =~ "int8" ]]; then
-                        continue
-                    fi
-                    if [[ ${precision} =~ "fp16" || ${precision} =~ "int8" ]] && [ ${use_trt} = "False" ]; then
-                        continue
-                    fi
-                    if [[ ${use_trt} = "False" || ${precision} =~ "int8" ]] && [[ ${_flag_quant} = "True" ]]; then
-                        continue
-                    fi
-                    _save_log_path="${LOG_PATH}/server_infer_gpu_usetrt_${use_trt}_precision_${precision}_batchsize_1.log"
-                    set_tensorrt=$(func_set_params "${web_use_trt_key}" "${use_trt}")
-                    set_precision=$(func_set_params "${web_precision_key}" "${precision}")
-                    web_service_cmd="${python} ${web_service_py} ${web_use_gpu_key}=${use_gpu} ${set_tensorrt} ${set_precision} & "
                    eval $web_service_cmd
                    sleep 2s
-                    pipeline_cmd="${python} ${pipeline_py} > ${_save_log_path} 2>&1"
+                    _save_log_path="${LOG_PATH}/server_infer_cpp_cpu_pipeline_usemkldnn_False_threads_4_batchsize_1.log"
+                    pipeline_cmd="${python} ocr_cpp_client.py ppocr_det_mobile_2.0_client/ ppocr_rec_mobile_2.0_client/"
                    eval $pipeline_cmd
-                    last_status=${PIPESTATUS[0]}
-                    eval "cat ${_save_log_path}"
                    status_check $last_status "${pipeline_cmd}" "${status_log}"
-                    PID=$!
-                    kill $PID
                    sleep 2s
                    ps ux | grep -E 'web_service|pipeline' | awk '{print $2}' | xargs kill -s 9
-                done
+                else
+                    web_service_cpp_cmd="${python} -m paddle_serving_server.serve --model ppocr_det_mobile_2.0_serving/ ppocr_rec_mobile_2.0_serving/ --port 9293 --gpu_id=0"
+                    eval $web_service_cmd
+                    sleep 2s
+                    _save_log_path="${LOG_PATH}/server_infer_cpp_cpu_pipeline_usemkldnn_False_threads_4_batchsize_1.log"
+                    pipeline_cmd="${python} ocr_cpp_client.py ppocr_det_mobile_2.0_client/ ppocr_rec_mobile_2.0_client/"
+                    eval $pipeline_cmd
+                    status_check $last_status "${pipeline_cmd}" "${status_log}"
+                    sleep 2s
+                    ps ux | grep -E 'web_service|pipeline' | awk '{print $2}' | xargs kill -s 9                
+                fi
            done
        else
-            echo "Does not support hardware other than CPU and GPU Currently!"
+            # python serving
+            for use_gpu in ${web_use_gpu_list[*]}; do
+                echo ${ues_gpu}
+                if [ ${use_gpu} = "null" ]; then
+                    for use_mkldnn in ${web_use_mkldnn_list[*]}; do
+                        if [ ${use_mkldnn} = "False" ]; then
+                            continue
+                        fi
+                        for threads in ${web_cpu_threads_list[*]}; do
+                            set_cpu_threads=$(func_set_params "${web_cpu_threads_key}" "${threads}")
+                            web_service_cmd="${python} ${web_service_py} ${web_use_gpu_key}=${use_gpu} ${web_use_mkldnn_key}=${use_mkldnn} ${set_cpu_threads} &"
+                            eval $web_service_cmd
+                            sleep 2s
+                            for pipeline in ${pipeline_py[*]}; do
+                                _save_log_path="${LOG_PATH}/server_infer_cpu_${pipeline%_client*}_usemkldnn_${use_mkldnn}_threads_${threads}_batchsize_1.log"
+                                pipeline_cmd="${python} ${pipeline} ${set_image_dir} > ${_save_log_path} 2>&1 "
+                                eval $pipeline_cmd
+                                last_status=${PIPESTATUS[0]}
+                                eval "cat ${_save_log_path}"
+                                status_check $last_status "${pipeline_cmd}" "${status_log}"
+                                sleep 2s
+                            done
+                            ps ux | grep -E 'web_service|pipeline' | awk '{print $2}' | xargs kill -s 9
+                        done
+                    done
+                elif [ ${use_gpu} = "0" ]; then
+                    for use_trt in ${web_use_trt_list[*]}; do
+                        for precision in ${web_precision_list[*]}; do
+                            if [[ ${_flag_quant} = "False" ]] && [[ ${precision} =~ "int8" ]]; then
+                                continue
+                            fi
+                            if [[ ${precision} =~ "fp16" || ${precision} =~ "int8" ]] && [ ${use_trt} = "False" ]; then
+                                continue
+                            fi
+                            if [[ ${use_trt} = "False" || ${precision} =~ "int8" ]] && [[ ${_flag_quant} = "True" ]]; then
+                                continue
+                            fi
+                            set_tensorrt=$(func_set_params "${web_use_trt_key}" "${use_trt}")
+                            set_precision=$(func_set_params "${web_precision_key}" "${precision}")
+                            web_service_cmd="${python} ${web_service_py} ${web_use_gpu_key}=${use_gpu} ${set_tensorrt} ${set_precision} & "
+                            eval $web_service_cmd
+                            sleep 2s
+                            for pipeline in ${pipeline_py[*]}; do
+                                _save_log_path="${LOG_PATH}/server_infer_gpu_${pipeline%_client*}_usetrt_${use_trt}_precision_${precision}_batchsize_1.log"
+                                pipeline_cmd="${python} ${pipeline} ${set_image_dir}> ${_save_log_path} 2>&1"
+                                eval $pipeline_cmd
+                                last_status=${PIPESTATUS[0]}
+                                eval "cat ${_save_log_path}"
+                                status_check $last_status "${pipeline_cmd}" "${status_log}"
+                                sleep 2s
+                            done
+                            ps ux | grep -E 'web_service|pipeline' | awk '{print $2}' | xargs kill -s 9
+                        done
+                    done
+                else
+                    echo "Does not support hardware other than CPU and GPU Currently!"
+                fi
+            done
        fi
    done
 }

--- a/PTDN/test_train_inference_python.sh
+++ b/PTDN/test_train_inference_python.sh
 #!/bin/bash
-source tests/common_func.sh
+source test_tipc/common_func.sh
 FILENAME=$1
-# MODE be one of ['lite_train_infer' 'whole_infer' 'whole_train_infer', 'infer', 'klquant_infer']
+# MODE be one of ['lite_train_lite_infer' 'lite_train_whole_infer' 'whole_train_whole_infer', 'whole_infer', 'klquant_whole_infer']
 MODE=$2
 dataline=$(awk 'NR==1, NR==51{print}'  $FILENAME)
@@ -59,6 +59,7 @@ export_key1=$(func_parser_key "${lines[33]}")
 export_value1=$(func_parser_value "${lines[33]}")
 export_key2=$(func_parser_key "${lines[34]}")
 export_value2=$(func_parser_value "${lines[34]}")
+inference_dir=$(func_parser_value "${lines[35]}")
 # parser inference model 
 infer_model_dir_list=$(func_parser_value "${lines[36]}")
@@ -88,38 +89,40 @@ infer_key1=$(func_parser_key "${lines[50]}")
 infer_value1=$(func_parser_value "${lines[50]}")
 # parser klquant_infer
-if [ ${MODE} = "klquant_infer" ]; then
+if [ ${MODE} = "klquant_whole_infer" ]; then
-    dataline=$(awk 'NR==82, NR==98{print}'  $FILENAME)
+    dataline=$(awk 'NR==1 NR==17{print}'  $FILENAME)
    lines=(${dataline})
+    model_name=$(func_parser_value "${lines[1]}")
+    python=$(func_parser_value "${lines[2]}")
    # parser inference model 
-    infer_model_dir_list=$(func_parser_value "${lines[1]}")
+    infer_model_dir_list=$(func_parser_value "${lines[3]}")
-    infer_export_list=$(func_parser_value "${lines[2]}")
+    infer_export_list=$(func_parser_value "${lines[4]}")
-    infer_is_quant=$(func_parser_value "${lines[3]}")
+    infer_is_quant=$(func_parser_value "${lines[5]}")
    # parser inference 
-    inference_py=$(func_parser_value "${lines[4]}")
+    inference_py=$(func_parser_value "${lines[6]}")
-    use_gpu_key=$(func_parser_key "${lines[5]}")
+    use_gpu_key=$(func_parser_key "${lines[7]}")
-    use_gpu_list=$(func_parser_value "${lines[5]}")
+    use_gpu_list=$(func_parser_value "${lines[7]}")
-    use_mkldnn_key=$(func_parser_key "${lines[6]}")
+    use_mkldnn_key=$(func_parser_key "${lines[8]}")
-    use_mkldnn_list=$(func_parser_value "${lines[6]}")
+    use_mkldnn_list=$(func_parser_value "${lines[8]}")
-    cpu_threads_key=$(func_parser_key "${lines[7]}")
+    cpu_threads_key=$(func_parser_key "${lines[9]}")
-    cpu_threads_list=$(func_parser_value "${lines[7]}")
+    cpu_threads_list=$(func_parser_value "${lines[9]}")
-    batch_size_key=$(func_parser_key "${lines[8]}")
+    batch_size_key=$(func_parser_key "${lines[10]}")
-    batch_size_list=$(func_parser_value "${lines[8]}")
+    batch_size_list=$(func_parser_value "${lines[10]}")
-    use_trt_key=$(func_parser_key "${lines[9]}")
+    use_trt_key=$(func_parser_key "${lines[11]}")
-    use_trt_list=$(func_parser_value "${lines[9]}")
+    use_trt_list=$(func_parser_value "${lines[11]}")
-    precision_key=$(func_parser_key "${lines[10]}")
+    precision_key=$(func_parser_key "${lines[12]}")
-    precision_list=$(func_parser_value "${lines[10]}")
+    precision_list=$(func_parser_value "${lines[12]}")
-    infer_model_key=$(func_parser_key "${lines[11]}")
+    infer_model_key=$(func_parser_key "${lines[13]}")
-    image_dir_key=$(func_parser_key "${lines[12]}")
+    image_dir_key=$(func_parser_key "${lines[14]}")
-    infer_img_dir=$(func_parser_value "${lines[12]}")
+    infer_img_dir=$(func_parser_value "${lines[14]}")
-    save_log_key=$(func_parser_key "${lines[13]}")
+    save_log_key=$(func_parser_key "${lines[15]}")
-    benchmark_key=$(func_parser_key "${lines[14]}")
+    benchmark_key=$(func_parser_key "${lines[16]}")
-    benchmark_value=$(func_parser_value "${lines[14]}")
+    benchmark_value=$(func_parser_value "${lines[16]}")
-    infer_key1=$(func_parser_key "${lines[15]}")
+    infer_key1=$(func_parser_key "${lines[17]}")
-    infer_value1=$(func_parser_value "${lines[15]}")
+    infer_value1=$(func_parser_value "${lines[17]}")
 fi
-LOG_PATH="./tests/output"
+LOG_PATH="./test_tipc/output"
 mkdir -p ${LOG_PATH}
 status_log="${LOG_PATH}/results_python.log"
@@ -202,7 +205,7 @@ function func_inference(){
    done
 }
-if [ ${MODE} = "infer" ] || [ ${MODE} = "klquant_infer" ]; then
+if [ ${MODE} = "whole_infer" ] || [ ${MODE} = "klquant_whole_infer" ]; then
    GPUID=$3
    if [ ${#GPUID} -le 0 ];then
        env=" "
@@ -243,7 +246,7 @@ else
    export Count=0
    USE_GPU_KEY=(${train_use_gpu_value})
    for gpu in ${gpu_list[*]}; do
-        use_gpu=${USE_GPU_KEY[Count]}
+        train_use_gpu=${USE_GPU_KEY[Count]}
        Count=$(($Count + 1))
        ips=""
        if [ ${gpu} = "-1" ];then
@@ -301,11 +304,20 @@ else
                set_pretrain=$(func_set_params "${pretrain_model_key}" "${pretrain_model_value}")
                set_batchsize=$(func_set_params "${train_batch_key}" "${train_batch_value}")
                set_train_params1=$(func_set_params "${train_param_key1}" "${train_param_value1}")
-                set_use_gpu=$(func_set_params "${train_use_gpu_key}" "${use_gpu}")
+                set_use_gpu=$(func_set_params "${train_use_gpu_key}" "${train_use_gpu}")
-                save_log="${LOG_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}"
+                if [ ${#ips} -le 26 ];then
+                    save_log="${LOG_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}"
+                    nodes=1
+                else
+                    IFS=","
+                    ips_array=(${ips})
+                    IFS="|"
+                    nodes=${#ips_array[@]}
+                    save_log="${LOG_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}_nodes_${nodes}"
+                fi
                # load pretrain from norm training if current trainer is pact or fpgm trainer
-                if [ ${trainer} = ${pact_key} ] || [ ${trainer} = ${fpgm_key} ]; then
+                if ([ ${trainer} = ${pact_key} ] || [ ${trainer} = ${fpgm_key} ]) && [ ${nodes} -le 1 ]; then
                    set_pretrain="${load_norm_train_model}"
                fi
@@ -315,7 +327,7 @@ else
                elif [ ${#ips} -le 26 ];then  # train with multi-gpu
                    cmd="${python} -m paddle.distributed.launch --gpus=${gpu} ${run_train} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config}"
                else     # train with multi-machine
-                    cmd="${python} -m paddle.distributed.launch --ips=${ips} --gpus=${gpu} ${set_use_gpu} ${run_train} ${set_save_model} ${set_pretrain} ${set_epoch} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config}"
+                    cmd="${python} -m paddle.distributed.launch --ips=${ips} --gpus=${gpu} ${run_train} ${set_use_gpu} ${set_save_model} ${set_pretrain} ${set_epoch} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config}"
                fi
                # run train
                eval "unset CUDA_VISIBLE_DEVICES"
@@ -324,7 +336,7 @@ else
                set_eval_pretrain=$(func_set_params "${pretrain_model_key}" "${save_log}/${train_model_name}")
                # save norm trained models to set pretrain for pact training and fpgm training 
-                if [ ${trainer} = ${trainer_norm} ]; then
+                if [ ${trainer} = ${trainer_norm} ] && [ ${nodes} -le 1]; then
                    load_norm_train_model=${set_eval_pretrain}
                fi
                # run eval 
@@ -347,7 +359,13 @@ else
                    #run inference
                    eval $env
                    save_infer_path="${save_log}"
-                    func_inference "${python}" "${inference_py}" "${save_infer_path}" "${LOG_PATH}" "${train_infer_img_dir}" "${flag_quant}"
+                    if [ ${inference_dir} != "null" ] && [ ${inference_dir} != '##' ]; then
+                        infer_model_dir="${save_infer_path}/${inference_dir}"
+                    else
+                        infer_model_dir=${save_infer_path}
+                    fi
+                    func_inference "${python}" "${inference_py}" "${infer_model_dir}" "${LOG_PATH}" "${train_infer_img_dir}" "${flag_quant}"
                    eval "unset CUDA_VISIBLE_DEVICES"
                fi
            done  # done with:    for trainer in ${trainer_list[*]}; do 

--- a/tools/__init__.py
+++ b/tools/__init__.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/tools/eval.py
+++ b/tools/eval.py
@@ -27,7 +27,7 @@ from ppocr.data import build_dataloader
 from ppocr.modeling.architectures import build_model
 from ppocr.postprocess import build_post_process
 from ppocr.metrics import build_metric
-from ppocr.utils.save_load import init_model, load_dygraph_params
+from ppocr.utils.save_load import load_model
 from ppocr.utils.utility import print_dict
 import tools.program as program
@@ -60,7 +60,7 @@ def main():
    else:
        model_type = None
-    best_model_dict = load_dygraph_params(config, model, logger, None)
+    best_model_dict = load_model(config, model)
    if len(best_model_dict):
        logger.info('metric in ckpt ***************')
        for k, v in best_model_dict.items():

--- a/tools/export_center.py
+++ b/tools/export_center.py
@@ -27,7 +27,7 @@ sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
 from ppocr.data import build_dataloader
 from ppocr.modeling.architectures import build_model
 from ppocr.postprocess import build_post_process
-from ppocr.utils.save_load import init_model, load_dygraph_params
+from ppocr.utils.save_load import load_model
 from ppocr.utils.utility import print_dict
 import tools.program as program
@@ -57,7 +57,7 @@ def main():
    model = build_model(config['Architecture'])
-    best_model_dict = load_dygraph_params(config, model, logger, None)
+    best_model_dict = load_model(config, model)
    if len(best_model_dict):
        logger.info('metric in ckpt ***************')
        for k, v in best_model_dict.items():

--- a/tools/export_model.py
+++ b/tools/export_model.py
@@ -26,7 +26,7 @@ from paddle.jit import to_static
 from ppocr.modeling.architectures import build_model
 from ppocr.postprocess import build_post_process
-from ppocr.utils.save_load import init_model
+from ppocr.utils.save_load import load_model
 from ppocr.utils.logging import get_logger
 from tools.program import load_config, merge_config, ArgsParser
@@ -107,7 +107,7 @@ def main():
        else:  # base rec model
            config["Architecture"]["Head"]["out_channels"] = char_num
    model = build_model(config["Architecture"])
-    init_model(config, model)
+    load_model(config, model)
    model.eval()
    save_path = config["Global"]["save_inference_dir"]

--- a/tools/infer/predict_cls.py
+++ b/tools/infer/predict_cls.py
@@ -47,6 +47,7 @@ class TextClassifier(object):
        self.postprocess_op = build_post_process(postprocess_params)
        self.predictor, self.input_tensor, self.output_tensors, _ = \
            utility.create_predictor(args, 'cls', logger)
+        self.use_onnx = args.use_onnx
    def resize_norm_img(self, img):
        imgC, imgH, imgW = self.cls_image_shape
@@ -100,10 +101,16 @@ class TextClassifier(object):
            norm_img_batch = np.concatenate(norm_img_batch)
            norm_img_batch = norm_img_batch.copy()
-            self.input_tensor.copy_from_cpu(norm_img_batch)
+            if self.use_onnx:
-            self.predictor.run()
+                input_dict = {}
-            prob_out = self.output_tensors[0].copy_to_cpu()
+                input_dict[self.input_tensor.name] = norm_img_batch
-            self.predictor.try_shrink_memory()
+                outputs = self.predictor.run(self.output_tensors, input_dict)
+                prob_out = outputs[0]
+            else:
+                self.input_tensor.copy_from_cpu(norm_img_batch)
+                self.predictor.run()
+                prob_out = self.output_tensors[0].copy_to_cpu()
+                self.predictor.try_shrink_memory()
            cls_result = self.postprocess_op(prob_out)
            elapse += time.time() - starttime
            for rno in range(len(cls_result)):

--- a/tools/infer/predict_det.py
+++ b/tools/infer/predict_det.py
@@ -38,6 +38,7 @@ class TextDetector(object):
    def __init__(self, args):
        self.args = args
        self.det_algorithm = args.det_algorithm
+        self.use_onnx = args.use_onnx
        pre_process_list = [{
            'DetResizeForTest': {
                'limit_side_len': args.det_limit_side_len,
@@ -100,7 +101,12 @@ class TextDetector(object):
        else:
            logger.info("unknown det_algorithm:{}".format(self.det_algorithm))
            sys.exit(0)
+        if self.use_onnx:
+            pre_process_list[0] = {
+                'DetResizeForTest': {
+                    'image_shape': [640, 640]
+                }
+            }
        self.preprocess_op = create_operators(pre_process_list)
        self.postprocess_op = build_post_process(postprocess_params)
        self.predictor, self.input_tensor, self.output_tensors, self.config = utility.create_predictor(
@@ -198,15 +204,19 @@ class TextDetector(object):
        if self.args.benchmark:
            self.autolog.times.stamp()
+        if self.use_onnx:
-        self.input_tensor.copy_from_cpu(img)
+            input_dict = {}
-        self.predictor.run()
+            input_dict[self.input_tensor.name] = img
-        outputs = []
+            outputs = self.predictor.run(self.output_tensors, input_dict)
-        for output_tensor in self.output_tensors:
+        else:
-            output = output_tensor.copy_to_cpu()
+            self.input_tensor.copy_from_cpu(img)
-            outputs.append(output)
+            self.predictor.run()
-        if self.args.benchmark:
+            outputs = []
-            self.autolog.times.stamp()
+            for output_tensor in self.output_tensors:
+                output = output_tensor.copy_to_cpu()
+                outputs.append(output)
+            if self.args.benchmark:
+                self.autolog.times.stamp()
        preds = {}
        if self.det_algorithm == "EAST":

--- a/tools/infer/predict_e2e.py
+++ b/tools/infer/predict_e2e.py
@@ -38,6 +38,7 @@ class TextE2E(object):
    def __init__(self, args):
        self.args = args
        self.e2e_algorithm = args.e2e_algorithm
+        self.use_onnx = args.use_onnx
        pre_process_list = [{
            'E2EResizeForTest': {}
        }, {
@@ -67,7 +68,6 @@ class TextE2E(object):
            postprocess_params["character_dict_path"] = args.e2e_char_dict_path
            postprocess_params["valid_set"] = args.e2e_pgnet_valid_set
            postprocess_params["mode"] = args.e2e_pgnet_mode
-            self.e2e_pgnet_polygon = args.e2e_pgnet_polygon
        else:
            logger.info("unknown e2e_algorithm:{}".format(self.e2e_algorithm))
            sys.exit(0)
@@ -106,21 +106,31 @@ class TextE2E(object):
        img = img.copy()
        starttime = time.time()
-        self.input_tensor.copy_from_cpu(img)
+        if self.use_onnx:
-        self.predictor.run()
+            input_dict = {}
-        outputs = []
+            input_dict[self.input_tensor.name] = img
-        for output_tensor in self.output_tensors:
+            outputs = self.predictor.run(self.output_tensors, input_dict)
-            output = output_tensor.copy_to_cpu()
+            preds = {}
-            outputs.append(output)
-        preds = {}
-        if self.e2e_algorithm == 'PGNet':
            preds['f_border'] = outputs[0]
            preds['f_char'] = outputs[1]
            preds['f_direction'] = outputs[2]
            preds['f_score'] = outputs[3]
        else:
-            raise NotImplementedError
+            self.input_tensor.copy_from_cpu(img)
+            self.predictor.run()
+            outputs = []
+            for output_tensor in self.output_tensors:
+                output = output_tensor.copy_to_cpu()
+                outputs.append(output)
+            preds = {}
+            if self.e2e_algorithm == 'PGNet':
+                preds['f_border'] = outputs[0]
+                preds['f_char'] = outputs[1]
+                preds['f_direction'] = outputs[2]
+                preds['f_score'] = outputs[3]
+            else:
+                raise NotImplementedError
        post_result = self.postprocess_op(preds, shape_list)
        points, strs = post_result['points'], post_result['texts']
        dt_boxes = self.filter_tag_det_res_only_clip(points, ori_im.shape)

--- a/tools/infer/predict_rec.py
+++ b/tools/infer/predict_rec.py
@@ -73,6 +73,7 @@ class TextRecognizer(object):
        self.predictor, self.input_tensor, self.output_tensors, self.config = \
            utility.create_predictor(args, 'rec', logger)
        self.benchmark = args.benchmark
+        self.use_onnx = args.use_onnx
        if args.benchmark:
            import auto_log
            pid = os.getpid()
@@ -106,8 +107,9 @@ class TextRecognizer(object):
            return norm_img.astype(np.float32) / 128. - 1.
        assert imgC == img.shape[2]
-        max_wh_ratio = max(max_wh_ratio, imgW / imgH)
        imgW = int((32 * max_wh_ratio))
+        if self.use_onnx:
+            imgW = 100
        h, w = img.shape[:2]
        ratio = w / float(h)
        if math.ceil(imgH * ratio) > imgW:
@@ -297,51 +299,72 @@ class TextRecognizer(object):
                    gsrm_slf_attn_bias1_list,
                    gsrm_slf_attn_bias2_list,
                ]
-                input_names = self.predictor.get_input_names()
+                if self.use_onnx:
-                for i in range(len(input_names)):
+                    input_dict = {}
-                    input_tensor = self.predictor.get_input_handle(input_names[
+                    input_dict[self.input_tensor.name] = norm_img_batch
-                        i])
+                    outputs = self.predictor.run(self.output_tensors,
-                    input_tensor.copy_from_cpu(inputs[i])
+                                                 input_dict)
-                self.predictor.run()
+                    preds = {"predict": outputs[2]}
-                outputs = []
+                else:
-                for output_tensor in self.output_tensors:
+                    input_names = self.predictor.get_input_names()
-                    output = output_tensor.copy_to_cpu()
+                    for i in range(len(input_names)):
-                    outputs.append(output)
+                        input_tensor = self.predictor.get_input_handle(
-                if self.benchmark:
+                            input_names[i])
-                    self.autolog.times.stamp()
+                        input_tensor.copy_from_cpu(inputs[i])
-                preds = {"predict": outputs[2]}
+                    self.predictor.run()
+                    outputs = []
+                    for output_tensor in self.output_tensors:
+                        output = output_tensor.copy_to_cpu()
+                        outputs.append(output)
+                    if self.benchmark:
+                        self.autolog.times.stamp()
+                    preds = {"predict": outputs[2]}
            elif self.rec_algorithm == "SAR":
                valid_ratios = np.concatenate(valid_ratios)
                inputs = [
                    norm_img_batch,
                    valid_ratios,
                ]
-                input_names = self.predictor.get_input_names()
+                if self.use_onnx:
-                for i in range(len(input_names)):
+                    input_dict = {}
-                    input_tensor = self.predictor.get_input_handle(input_names[
+                    input_dict[self.input_tensor.name] = norm_img_batch
-                        i])
+                    outputs = self.predictor.run(self.output_tensors,
-                    input_tensor.copy_from_cpu(inputs[i])
+                                                 input_dict)
-                self.predictor.run()
+                    preds = outputs[0]
-                outputs = []
-                for output_tensor in self.output_tensors:
-                    output = output_tensor.copy_to_cpu()
-                    outputs.append(output)
-                if self.benchmark:
-                    self.autolog.times.stamp()
-                preds = outputs[0]
-            else:
-                self.input_tensor.copy_from_cpu(norm_img_batch)
-                self.predictor.run()
-                outputs = []
-                for output_tensor in self.output_tensors:
-                    output = output_tensor.copy_to_cpu()
-                    outputs.append(output)
-                if self.benchmark:
-                    self.autolog.times.stamp()
-                if len(outputs) != 1:
-                    preds = outputs
                else:
+                    input_names = self.predictor.get_input_names()
+                    for i in range(len(input_names)):
+                        input_tensor = self.predictor.get_input_handle(
+                            input_names[i])
+                        input_tensor.copy_from_cpu(inputs[i])
+                    self.predictor.run()
+                    outputs = []
+                    for output_tensor in self.output_tensors:
+                        output = output_tensor.copy_to_cpu()
+                        outputs.append(output)
+                    if self.benchmark:
+                        self.autolog.times.stamp()
                    preds = outputs[0]
+            else:
+                if self.use_onnx:
+                    input_dict = {}
+                    input_dict[self.input_tensor.name] = norm_img_batch
+                    outputs = self.predictor.run(self.output_tensors,
+                                                 input_dict)
+                    preds = outputs[0]
+                else:
+                    self.input_tensor.copy_from_cpu(norm_img_batch)
+                    self.predictor.run()
+                    outputs = []
+                    for output_tensor in self.output_tensors:
+                        output = output_tensor.copy_to_cpu()
+                        outputs.append(output)
+                    if self.benchmark:
+                        self.autolog.times.stamp()
+                    if len(outputs) != 1:
+                        preds = outputs
+                    else:
+                        preds = outputs[0]
            rec_result = self.postprocess_op(preds)
            for rno in range(len(rec_result)):
                rec_res[indices[beg_img_no + rno]] = rec_result[rno]

--- a/tools/infer/predict_system.py
+++ b/tools/infer/predict_system.py
@@ -49,11 +49,19 @@ class TextSystem(object):
        if self.use_angle_cls:
            self.text_classifier = predict_cls.TextClassifier(args)
-    def print_draw_crop_rec_res(self, img_crop_list, rec_res):
+        self.args = args
+        self.crop_image_res_index = 0
+    def draw_crop_rec_res(self, output_dir, img_crop_list, rec_res):
+        os.makedirs(output_dir, exist_ok=True)
        bbox_num = len(img_crop_list)
        for bno in range(bbox_num):
-            cv2.imwrite("./output/img_crop_%d.jpg" % bno, img_crop_list[bno])
+            cv2.imwrite(
-            logger.info(bno, rec_res[bno])
+                os.path.join(output_dir,
+                             f"mg_crop_{bno+self.crop_image_res_index}.jpg"),
+                img_crop_list[bno])
+            logger.debug(f"{bno}, {rec_res[bno]}")
+        self.crop_image_res_index += bbox_num
    def __call__(self, img, cls=True):
        ori_im = img.copy()
@@ -80,7 +88,9 @@ class TextSystem(object):
        rec_res, elapse = self.text_recognizer(img_crop_list)
        logger.debug("rec_res num  : {}, elapse : {}".format(
            len(rec_res), elapse))
-        # self.print_draw_crop_rec_res(img_crop_list, rec_res)
+        if self.args.save_crop_res:
+            self.draw_crop_rec_res(self.args.crop_res_save_dir, img_crop_list,
+                                   rec_res)
        filter_boxes, filter_rec_res = [], []
        for box, rec_reuslt in zip(dt_boxes, rec_res):
            text, score = rec_reuslt
@@ -135,17 +145,17 @@ def main(args):
        if not flag:
            img = cv2.imread(image_file)
        if img is None:
-            logger.info("error in loading image:{}".format(image_file))
+            logger.debug("error in loading image:{}".format(image_file))
            continue
        starttime = time.time()
        dt_boxes, rec_res = text_sys(img)
        elapse = time.time() - starttime
        total_time += elapse
-        logger.info(
+        logger.debug(
            str(idx) + "  Predict time of %s: %.3fs" % (image_file, elapse))
        for text, score in rec_res:
-            logger.info("{}, {:.3f}".format(text, score))
+            logger.debug("{}, {:.3f}".format(text, score))
        if is_visualize:
            image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
@@ -160,19 +170,17 @@ def main(args):
                scores,
                drop_score=drop_score,
                font_path=font_path)
-            draw_img_save = "./inference_results/"
+            draw_img_save_dir = args.draw_img_save_dir
-            if not os.path.exists(draw_img_save):
+            os.makedirs(draw_img_save_dir, exist_ok=True)
-                os.makedirs(draw_img_save)
            if flag:
                image_file = image_file[:-3] + "png"
            cv2.imwrite(
-                os.path.join(draw_img_save, os.path.basename(image_file)),
+                os.path.join(draw_img_save_dir, os.path.basename(image_file)),
                draw_img[:, :, ::-1])
-            logger.info("The visualized image saved in {}".format(
+            logger.debug("The visualized image saved in {}".format(
-                os.path.join(draw_img_save, os.path.basename(image_file))))
+                os.path.join(draw_img_save_dir, os.path.basename(image_file))))
    logger.info("The predict total time is {}".format(time.time() - _st))
-    logger.info("\nThe predict total time is {}".format(total_time))
    if args.benchmark:
        text_sys.text_detector.autolog.report()
        text_sys.text_recognizer.autolog.report()

--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@@ -17,7 +17,7 @@ import os
 import sys
 import cv2
 import numpy as np
-import json
+import paddle
 from PIL import Image, ImageDraw, ImageFont
 import math
 from paddle import inference
@@ -96,7 +96,6 @@ def init_args():
    parser.add_argument(
        "--e2e_char_dict_path", type=str, default="./ppocr/utils/ic15_dict.txt")
    parser.add_argument("--e2e_pgnet_valid_set", type=str, default='totaltext')
-    parser.add_argument("--e2e_pgnet_polygon", type=str2bool, default=True)
    parser.add_argument("--e2e_pgnet_mode", type=str, default='fast')
    # params for text classifier
@@ -110,7 +109,13 @@ def init_args():
    parser.add_argument("--enable_mkldnn", type=str2bool, default=False)
    parser.add_argument("--cpu_threads", type=int, default=10)
    parser.add_argument("--use_pdserving", type=str2bool, default=False)
-    parser.add_argument("--warmup", type=str2bool, default=True)
+    parser.add_argument("--warmup", type=str2bool, default=False)
+    #
+    parser.add_argument(
+        "--draw_img_save_dir", type=str, default="./inference_results")
+    parser.add_argument("--save_crop_res", type=str2bool, default=False)
+    parser.add_argument("--crop_res_save_dir", type=str, default="./output")
    # multi-process
    parser.add_argument("--use_mp", type=str2bool, default=False)
@@ -121,6 +126,7 @@ def init_args():
    parser.add_argument("--save_log_path", type=str, default="./log_output/")
    parser.add_argument("--show_log", type=str2bool, default=True)
+    parser.add_argument("--use_onnx", type=str2bool, default=False)
    return parser
@@ -144,25 +150,14 @@ def create_predictor(args, mode, logger):
    if model_dir is None:
        logger.info("not find {} model file path {}".format(mode, model_dir))
        sys.exit(0)
-    model_file_path = model_dir + "/inference.pdmodel"
+    if args.use_onnx:
-    params_file_path = model_dir + "/inference.pdiparams"
+        import onnxruntime as ort
-    if not os.path.exists(model_file_path):
+        model_file_path = model_dir
-        raise ValueError("not find model file path {}".format(model_file_path))
+        if not os.path.exists(model_file_path):
-    if not os.path.exists(params_file_path):
+            raise ValueError("not find model file path {}".format(
-        raise ValueError("not find params file path {}".format(
+                model_file_path))
-            params_file_path))
+        sess = ort.InferenceSession(model_file_path)
+        return sess, sess.get_inputs()[0], None, None
-    config = inference.Config(model_file_path, params_file_path)
-    if hasattr(args, 'precision'):
-        if args.precision == "fp16" and args.use_tensorrt:
-            precision = inference.PrecisionType.Half
-        elif args.precision == "int8":
-            precision = inference.PrecisionType.Int8
-        else:
-            precision = inference.PrecisionType.Float32
-    else:
-        precision = inference.PrecisionType.Float32
    if args.use_gpu:
        gpu_id = get_infer_gpuid()
@@ -250,47 +245,154 @@ def create_predictor(args, mode, logger):
            min_input_shape = {"x": [1, 3, 48, 10]}
            max_input_shape = {"x": [args.rec_batch_num, 3, 48, 2000]}
            opt_input_shape = {"x": [args.rec_batch_num, 3, 48, 320]}
+        model_file_path = model_dir + "/inference.pdmodel"
+        params_file_path = model_dir + "/inference.pdiparams"
+        if not os.path.exists(model_file_path):
+            raise ValueError("not find model file path {}".format(
+                model_file_path))
+        if not os.path.exists(params_file_path):
+            raise ValueError("not find params file path {}".format(
+                params_file_path))
+        config = inference.Config(model_file_path, params_file_path)
+        if hasattr(args, 'precision'):
+            if args.precision == "fp16" and args.use_tensorrt:
+                precision = inference.PrecisionType.Half
+            elif args.precision == "int8":
+                precision = inference.PrecisionType.Int8
+            else:
+                precision = inference.PrecisionType.Float32
        else:
-            min_input_shape = {"x": [1, 3, 10, 10]}
+            precision = inference.PrecisionType.Float32
-            max_input_shape = {"x": [1, 3, 1000, 1000]}
-            opt_input_shape = {"x": [1, 3, 500, 500]}
+        if args.use_gpu:
-        config.set_trt_dynamic_shape_info(min_input_shape, max_input_shape,
+            gpu_id = get_infer_gpuid()
-                                          opt_input_shape)
+            if gpu_id is None:
+                logger.warning(
+                    "GPU is not found in current device by nvidia-smi. Please check your device or ignore it if run on jeston."
+                )
+            config.enable_use_gpu(args.gpu_mem, 0)
+            if args.use_tensorrt:
+                config.enable_tensorrt_engine(
+                    precision_mode=precision,
+                    max_batch_size=args.max_batch_size,
+                    min_subgraph_size=args.min_subgraph_size)
+                # skip the minmum trt subgraph
+            if mode == "det":
+                min_input_shape = {
+                    "x": [1, 3, 50, 50],
+                    "conv2d_92.tmp_0": [1, 120, 20, 20],
+                    "conv2d_91.tmp_0": [1, 24, 10, 10],
+                    "conv2d_59.tmp_0": [1, 96, 20, 20],
+                    "nearest_interp_v2_1.tmp_0": [1, 256, 10, 10],
+                    "nearest_interp_v2_2.tmp_0": [1, 256, 20, 20],
+                    "conv2d_124.tmp_0": [1, 256, 20, 20],
+                    "nearest_interp_v2_3.tmp_0": [1, 64, 20, 20],
+                    "nearest_interp_v2_4.tmp_0": [1, 64, 20, 20],
+                    "nearest_interp_v2_5.tmp_0": [1, 64, 20, 20],
+                    "elementwise_add_7": [1, 56, 2, 2],
+                    "nearest_interp_v2_0.tmp_0": [1, 256, 2, 2]
+                }
+                max_input_shape = {
+                    "x": [1, 3, 1280, 1280],
+                    "conv2d_92.tmp_0": [1, 120, 400, 400],
+                    "conv2d_91.tmp_0": [1, 24, 200, 200],
+                    "conv2d_59.tmp_0": [1, 96, 400, 400],
+                    "nearest_interp_v2_1.tmp_0": [1, 256, 200, 200],
+                    "conv2d_124.tmp_0": [1, 256, 400, 400],
+                    "nearest_interp_v2_2.tmp_0": [1, 256, 400, 400],
+                    "nearest_interp_v2_3.tmp_0": [1, 64, 400, 400],
+                    "nearest_interp_v2_4.tmp_0": [1, 64, 400, 400],
+                    "nearest_interp_v2_5.tmp_0": [1, 64, 400, 400],
+                    "elementwise_add_7": [1, 56, 400, 400],
+                    "nearest_interp_v2_0.tmp_0": [1, 256, 400, 400]
+                }
+                opt_input_shape = {
+                    "x": [1, 3, 640, 640],
+                    "conv2d_92.tmp_0": [1, 120, 160, 160],
+                    "conv2d_91.tmp_0": [1, 24, 80, 80],
+                    "conv2d_59.tmp_0": [1, 96, 160, 160],
+                    "nearest_interp_v2_1.tmp_0": [1, 256, 80, 80],
+                    "nearest_interp_v2_2.tmp_0": [1, 256, 160, 160],
+                    "conv2d_124.tmp_0": [1, 256, 160, 160],
+                    "nearest_interp_v2_3.tmp_0": [1, 64, 160, 160],
+                    "nearest_interp_v2_4.tmp_0": [1, 64, 160, 160],
+                    "nearest_interp_v2_5.tmp_0": [1, 64, 160, 160],
+                    "elementwise_add_7": [1, 56, 40, 40],
+                    "nearest_interp_v2_0.tmp_0": [1, 256, 40, 40]
+                }
+                min_pact_shape = {
+                    "nearest_interp_v2_26.tmp_0": [1, 256, 20, 20],
+                    "nearest_interp_v2_27.tmp_0": [1, 64, 20, 20],
+                    "nearest_interp_v2_28.tmp_0": [1, 64, 20, 20],
+                    "nearest_interp_v2_29.tmp_0": [1, 64, 20, 20]
+                }
+                max_pact_shape = {
+                    "nearest_interp_v2_26.tmp_0": [1, 256, 400, 400],
+                    "nearest_interp_v2_27.tmp_0": [1, 64, 400, 400],
+                    "nearest_interp_v2_28.tmp_0": [1, 64, 400, 400],
+                    "nearest_interp_v2_29.tmp_0": [1, 64, 400, 400]
+                }
+                opt_pact_shape = {
+                    "nearest_interp_v2_26.tmp_0": [1, 256, 160, 160],
+                    "nearest_interp_v2_27.tmp_0": [1, 64, 160, 160],
+                    "nearest_interp_v2_28.tmp_0": [1, 64, 160, 160],
+                    "nearest_interp_v2_29.tmp_0": [1, 64, 160, 160]
+                }
+                min_input_shape.update(min_pact_shape)
+                max_input_shape.update(max_pact_shape)
+                opt_input_shape.update(opt_pact_shape)
+            elif mode == "rec":
+                min_input_shape = {"x": [1, 3, 32, 10]}
+                max_input_shape = {"x": [args.rec_batch_num, 3, 32, 1024]}
+                opt_input_shape = {"x": [args.rec_batch_num, 3, 32, 320]}
+            elif mode == "cls":
+                min_input_shape = {"x": [1, 3, 48, 10]}
+                max_input_shape = {"x": [args.rec_batch_num, 3, 48, 1024]}
+                opt_input_shape = {"x": [args.rec_batch_num, 3, 48, 320]}
+            else:
+                min_input_shape = {"x": [1, 3, 10, 10]}
+                max_input_shape = {"x": [1, 3, 512, 512]}
+                opt_input_shape = {"x": [1, 3, 256, 256]}
+            config.set_trt_dynamic_shape_info(min_input_shape, max_input_shape,
+                                              opt_input_shape)
-    else:
-        config.disable_gpu()
-        if hasattr(args, "cpu_threads"):
-            config.set_cpu_math_library_num_threads(args.cpu_threads)
        else:
-            # default cpu threads as 10
+            config.disable_gpu()
-            config.set_cpu_math_library_num_threads(10)
+            if hasattr(args, "cpu_threads"):
-        if args.enable_mkldnn:
+                config.set_cpu_math_library_num_threads(args.cpu_threads)
-            # cache 10 different shapes for mkldnn to avoid memory leak
+            else:
-            config.set_mkldnn_cache_capacity(10)
+                # default cpu threads as 10
-            config.enable_mkldnn()
+                config.set_cpu_math_library_num_threads(10)
-            if args.precision == "fp16":
+            if args.enable_mkldnn:
-                config.enable_mkldnn_bfloat16()
+                # cache 10 different shapes for mkldnn to avoid memory leak
-    # enable memory optim
+                config.set_mkldnn_cache_capacity(10)
-    config.enable_memory_optim()
+                config.enable_mkldnn()
-    config.disable_glog_info()
+                if args.precision == "fp16":
+                    config.enable_mkldnn_bfloat16()
-    config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass")
+        # enable memory optim
-    if mode == 'table':
+        config.enable_memory_optim()
-        config.delete_pass("fc_fuse_pass")  # not supported for table
+        config.disable_glog_info()
-    config.switch_use_feed_fetch_ops(False)
-    config.switch_ir_optim(True)
+        config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass")
+        if mode == 'table':
-    # create predictor
+            config.delete_pass("fc_fuse_pass")  # not supported for table
-    predictor = inference.create_predictor(config)
+        config.switch_use_feed_fetch_ops(False)
-    input_names = predictor.get_input_names()
+        config.switch_ir_optim(True)
-    for name in input_names:
-        input_tensor = predictor.get_input_handle(name)
+        # create predictor
-    output_names = predictor.get_output_names()
+        predictor = inference.create_predictor(config)
-    output_tensors = []
+        input_names = predictor.get_input_names()
-    for output_name in output_names:
+        for name in input_names:
-        output_tensor = predictor.get_output_handle(output_name)
+            input_tensor = predictor.get_input_handle(name)
-        output_tensors.append(output_tensor)
+        output_names = predictor.get_output_names()
-    return predictor, input_tensor, output_tensors, config
+        output_tensors = []
+        for output_name in output_names:
+            output_tensor = predictor.get_output_handle(output_name)
+            output_tensors.append(output_tensor)
+        return predictor, input_tensor, output_tensors, config
 def get_infer_gpuid():
@@ -593,5 +695,12 @@ def get_rotate_crop_image(img, points):
    return dst_img
+def check_gpu(use_gpu):
+    if use_gpu and not paddle.is_compiled_with_cuda():
+        use_gpu = False
+    return use_gpu
 if __name__ == '__main__':
    pass