Commit 6c7ff9c7 authored by LDOUBLEV's avatar LDOUBLEV
Browse files

fix conflict

parents ac91a9e1 9b8f587e
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from PIL import Image
import numpy as np
from tools.infer.utility import draw_ocr_box_txt, init_args as infer_args
def init_args():
parser = infer_args()
# params for output
parser.add_argument("--output", type=str, default='./output/table')
# params for table structure
parser.add_argument("--table_max_len", type=int, default=488)
parser.add_argument("--table_model_dir", type=str)
parser.add_argument("--table_char_type", type=str, default='en')
parser.add_argument("--table_char_dict_path", type=str, default="../ppocr/utils/dict/table_structure_dict.txt")
parser.add_argument("--layout_path_model", type=str, default="lp://PubLayNet/ppyolov2_r50vd_dcn_365e_publaynet/config")
return parser
def parse_args():
parser = init_args()
return parser.parse_args()
def draw_structure_result(image, result, font_path):
if isinstance(image, np.ndarray):
image = Image.fromarray(image)
boxes, txts, scores = [], [], []
for region in result:
if region['type'] == 'Table':
pass
else:
for box, rec_res in zip(region['res'][0], region['res'][1]):
boxes.append(np.array(box).reshape(-1, 2))
txts.append(rec_res[0])
scores.append(rec_res[1])
im_show = draw_ocr_box_txt(image, boxes, txts, scores, font_path=font_path,drop_score=0)
return im_show
\ No newline at end of file
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
from setuptools import setup from setuptools import setup
from io import open from io import open
from paddleocr import VERSION
with open('requirements.txt', encoding="utf-8-sig") as f: with open('requirements.txt', encoding="utf-8-sig") as f:
requirements = f.readlines() requirements = f.readlines()
...@@ -32,7 +33,7 @@ setup( ...@@ -32,7 +33,7 @@ setup(
package_dir={'paddleocr': ''}, package_dir={'paddleocr': ''},
include_package_data=True, include_package_data=True,
entry_points={"console_scripts": ["paddleocr= paddleocr.paddleocr:main"]}, entry_points={"console_scripts": ["paddleocr= paddleocr.paddleocr:main"]},
version='2.0.6', version=VERSION,
install_requires=requirements, install_requires=requirements,
license='Apache License 2.0', license='Apache License 2.0',
description='Awesome OCR toolkits based on PaddlePaddle (8.6M ultra-lightweight pre-trained model, support training and deployment among server, mobile, embeded and IoT devices', description='Awesome OCR toolkits based on PaddlePaddle (8.6M ultra-lightweight pre-trained model, support training and deployment among server, mobile, embeded and IoT devices',
......
===========================train_params===========================
model_name:ocr_det
python:python3.7
gpu_list:0|0,1
Global.use_gpu:True|True
Global.auto_cast:null
Global.epoch_num:lite_train_infer=2|whole_train_infer=300
Global.save_model_dir:./output/
Train.loader.batch_size_per_card:lite_train_infer=2|whole_train_infer=4
Global.pretrained_model:null
train_model_name:latest
train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/
null:null
##
trainer:norm_train|pact_train
norm_train:tools/train.py -c configs/det/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained
pact_train:deploy/slim/quantization/quant.py -c configs/det/det_mv3_db.yml -o
fpgm_train:null
distill_train:null
null:null
null:null
##
===========================eval_params===========================
eval:tools/eval.py -c configs/det/det_mv3_db.yml -o
null:null
##
===========================infer_params===========================
Global.save_inference_dir:./output/
Global.pretrained_model:
norm_export:tools/export_model.py -c configs/det/det_mv3_db.yml -o
quant_export:deploy/slim/quantization/export_model.py -c configs/det/det_mv3_db.yml -o
fpgm_export:deploy/slim/prune/export_prune_model.py
distill_export:null
export1:null
export2:null
##
infer_model:./inference/ch_ppocr_mobile_v2.0_det_infer/
infer_export:null
infer_quant:False
inference:tools/infer/predict_det.py
--use_gpu:True|False
--enable_mkldnn:True|False
--cpu_threads:1|6
--rec_batch_num:1
--use_tensorrt:False|True
--precision:fp32|fp16|int8
--det_model_dir:
--image_dir:./inference/ch_det_data_50/all-sum-510/
--save_log_path:null
--benchmark:True
null:null
#!/bin/bash
FILENAME=$1
# MODE be one of ['lite_train_infer' 'whole_infer' 'whole_train_infer', 'infer']
MODE=$2
dataline=$(cat ${FILENAME})
# parser params
IFS=$'\n'
lines=(${dataline})
function func_parser_key(){
strs=$1
IFS=":"
array=(${strs})
tmp=${array[0]}
echo ${tmp}
}
function func_parser_value(){
strs=$1
IFS=":"
array=(${strs})
tmp=${array[1]}
echo ${tmp}
}
IFS=$'\n'
# The training params
model_name=$(func_parser_value "${lines[1]}")
trainer_list=$(func_parser_value "${lines[14]}")
# MODE be one of ['lite_train_infer' 'whole_infer' 'whole_train_infer']
MODE=$2
if [ ${MODE} = "lite_train_infer" ];then
# pretrain lite train data
wget -nc -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_5_pretrained.pdparams
rm -rf ./train_data/icdar2015
wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/icdar2015_lite.tar
cd ./train_data/ && tar xf icdar2015_lite.tar
ln -s ./icdar2015_lite ./icdar2015
cd ../
elif [ ${MODE} = "whole_train_infer" ];then
wget -nc -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_5_pretrained.pdparams
rm -rf ./train_data/icdar2015
wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/icdar2015.tar
cd ./train_data/ && tar xf icdar2015.tar && cd ../
elif [ ${MODE} = "whole_infer" ];then
wget -nc -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_5_pretrained.pdparams
rm -rf ./train_data/icdar2015
wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/icdar2015_infer.tar
cd ./train_data/ && tar xf icdar2015_infer.tar
ln -s ./icdar2015_infer ./icdar2015
cd ../
else
rm -rf ./train_data/icdar2015
if [[ ${model_name} = "ocr_det" ]]; then
wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar
eval_model_name="ch_ppocr_mobile_v2.0_det_infer"
wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar
cd ./inference && tar xf ${eval_model_name}.tar && tar xf ch_det_data_50.tar && cd ../
else
eval_model_name="ch_ppocr_mobile_v2.0_rec_train"
wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_train.tar
cd ./inference && tar xf ${eval_model_name}.tar && cd ../
fi
fi
#!/bin/bash
FILENAME=$1
# MODE be one of ['lite_train_infer' 'whole_infer' 'whole_train_infer', 'infer']
MODE=$2
dataline=$(cat ${FILENAME})
# parser params
IFS=$'\n'
lines=(${dataline})
function func_parser_key(){
strs=$1
IFS=":"
array=(${strs})
tmp=${array[0]}
echo ${tmp}
}
function func_parser_value(){
strs=$1
IFS=":"
array=(${strs})
tmp=${array[1]}
echo ${tmp}
}
function func_set_params(){
key=$1
value=$2
if [ ${key} = "null" ];then
echo " "
elif [[ ${value} = "null" ]] || [[ ${value} = " " ]] || [ ${#value} -le 0 ];then
echo " "
else
echo "${key}=${value}"
fi
}
function func_parser_params(){
strs=$1
IFS=":"
array=(${strs})
key=${array[0]}
tmp=${array[1]}
IFS="|"
res=""
for _params in ${tmp[*]}; do
IFS="="
array=(${_params})
mode=${array[0]}
value=${array[1]}
if [[ ${mode} = ${MODE} ]]; then
IFS="|"
#echo $(func_set_params "${mode}" "${value}")
echo $value
break
fi
IFS="|"
done
echo ${res}
}
function status_check(){
last_status=$1 # the exit code
run_command=$2
run_log=$3
if [ $last_status -eq 0 ]; then
echo -e "\033[33m Run successfully with command - ${run_command}! \033[0m" | tee -a ${run_log}
else
echo -e "\033[33m Run failed with command - ${run_command}! \033[0m" | tee -a ${run_log}
fi
}
IFS=$'\n'
# The training params
model_name=$(func_parser_value "${lines[1]}")
python=$(func_parser_value "${lines[2]}")
gpu_list=$(func_parser_value "${lines[3]}")
train_use_gpu_key=$(func_parser_key "${lines[4]}")
train_use_gpu_value=$(func_parser_value "${lines[4]}")
autocast_list=$(func_parser_value "${lines[5]}")
autocast_key=$(func_parser_key "${lines[5]}")
epoch_key=$(func_parser_key "${lines[6]}")
epoch_num=$(func_parser_params "${lines[6]}")
save_model_key=$(func_parser_key "${lines[7]}")
train_batch_key=$(func_parser_key "${lines[8]}")
train_batch_value=$(func_parser_params "${lines[8]}")
pretrain_model_key=$(func_parser_key "${lines[9]}")
pretrain_model_value=$(func_parser_value "${lines[9]}")
train_model_name=$(func_parser_value "${lines[10]}")
train_infer_img_dir=$(func_parser_value "${lines[11]}")
train_param_key1=$(func_parser_key "${lines[12]}")
train_param_value1=$(func_parser_value "${lines[12]}")
trainer_list=$(func_parser_value "${lines[14]}")
trainer_norm=$(func_parser_key "${lines[15]}")
norm_trainer=$(func_parser_value "${lines[15]}")
pact_key=$(func_parser_key "${lines[16]}")
pact_trainer=$(func_parser_value "${lines[16]}")
fpgm_key=$(func_parser_key "${lines[17]}")
fpgm_trainer=$(func_parser_value "${lines[17]}")
distill_key=$(func_parser_key "${lines[18]}")
distill_trainer=$(func_parser_value "${lines[18]}")
trainer_key1=$(func_parser_key "${lines[19]}")
trainer_value1=$(func_parser_value "${lines[19]}")
trainer_key2=$(func_parser_key "${lines[20]}")
trainer_value2=$(func_parser_value "${lines[20]}")
eval_py=$(func_parser_value "${lines[23]}")
eval_key1=$(func_parser_key "${lines[24]}")
eval_value1=$(func_parser_value "${lines[24]}")
save_infer_key=$(func_parser_key "${lines[27]}")
export_weight=$(func_parser_key "${lines[28]}")
norm_export=$(func_parser_value "${lines[29]}")
pact_export=$(func_parser_value "${lines[30]}")
fpgm_export=$(func_parser_value "${lines[31]}")
distill_export=$(func_parser_value "${lines[32]}")
export_key1=$(func_parser_key "${lines[33]}")
export_value1=$(func_parser_value "${lines[33]}")
export_key2=$(func_parser_key "${lines[34]}")
export_value2=$(func_parser_value "${lines[34]}")
# parser inference model
infer_model_dir_list=$(func_parser_value "${lines[36]}")
infer_export_list=$(func_parser_value "${lines[37]}")
infer_is_quant=$(func_parser_value "${lines[38]}")
# parser inference
inference_py=$(func_parser_value "${lines[39]}")
use_gpu_key=$(func_parser_key "${lines[40]}")
use_gpu_list=$(func_parser_value "${lines[40]}")
use_mkldnn_key=$(func_parser_key "${lines[41]}")
use_mkldnn_list=$(func_parser_value "${lines[41]}")
cpu_threads_key=$(func_parser_key "${lines[42]}")
cpu_threads_list=$(func_parser_value "${lines[42]}")
batch_size_key=$(func_parser_key "${lines[43]}")
batch_size_list=$(func_parser_value "${lines[43]}")
use_trt_key=$(func_parser_key "${lines[44]}")
use_trt_list=$(func_parser_value "${lines[44]}")
precision_key=$(func_parser_key "${lines[45]}")
precision_list=$(func_parser_value "${lines[45]}")
infer_model_key=$(func_parser_key "${lines[46]}")
image_dir_key=$(func_parser_key "${lines[47]}")
infer_img_dir=$(func_parser_value "${lines[47]}")
save_log_key=$(func_parser_key "${lines[48]}")
benchmark_key=$(func_parser_key "${lines[49]}")
benchmark_value=$(func_parser_value "${lines[49]}")
infer_key1=$(func_parser_key "${lines[50]}")
infer_value1=$(func_parser_value "${lines[50]}")
LOG_PATH="./tests/output"
mkdir -p ${LOG_PATH}
status_log="${LOG_PATH}/results.log"
function func_inference(){
IFS='|'
_python=$1
_script=$2
_model_dir=$3
_log_path=$4
_img_dir=$5
_flag_quant=$6
# inference
for use_gpu in ${use_gpu_list[*]}; do
if [ ${use_gpu} = "False" ] || [ ${use_gpu} = "cpu" ]; then
for use_mkldnn in ${use_mkldnn_list[*]}; do
if [ ${use_mkldnn} = "False" ] && [ ${_flag_quant} = "True" ]; then
continue
fi
for threads in ${cpu_threads_list[*]}; do
for batch_size in ${batch_size_list[*]}; do
_save_log_path="${_log_path}/infer_cpu_usemkldnn_${use_mkldnn}_threads_${threads}_batchsize_${batch_size}.log"
set_infer_data=$(func_set_params "${image_dir_key}" "${_img_dir}")
set_benchmark=$(func_set_params "${benchmark_key}" "${benchmark_value}")
set_batchsize=$(func_set_params "${batch_size_key}" "${batch_size}")
set_cpu_threads=$(func_set_params "${cpu_threads_key}" "${threads}")
set_model_dir=$(func_set_params "${infer_model_key}" "${_model_dir}")
set_infer_params1=$(func_set_params "${infer_key1}" "${infer_value1}")
command="${_python} ${_script} ${use_gpu_key}=${use_gpu} ${use_mkldnn_key}=${use_mkldnn} ${set_cpu_threads} ${set_model_dir} ${set_batchsize} ${set_infer_data} ${set_benchmark} ${set_infer_params1} > ${_save_log_path} 2>&1 "
eval $command
last_status=${PIPESTATUS[0]}
eval "cat ${_save_log_path}"
status_check $last_status "${command}" "${status_log}"
done
done
done
elif [ ${use_gpu} = "True" ] || [ ${use_gpu} = "gpu" ]; then
for use_trt in ${use_trt_list[*]}; do
for precision in ${precision_list[*]}; do
if [[ ${_flag_quant} = "False" ]] && [[ ${precision} =~ "int8" ]]; then
continue
fi
if [[ ${precision} =~ "fp16" || ${precision} =~ "int8" ]] && [ ${use_trt} = "False" ]; then
continue
fi
if [[ ${use_trt} = "False" || ${precision} =~ "int8" ]] && [ ${_flag_quant} = "True" ]; then
continue
fi
for batch_size in ${batch_size_list[*]}; do
_save_log_path="${_log_path}/infer_gpu_usetrt_${use_trt}_precision_${precision}_batchsize_${batch_size}.log"
set_infer_data=$(func_set_params "${image_dir_key}" "${_img_dir}")
set_benchmark=$(func_set_params "${benchmark_key}" "${benchmark_value}")
set_batchsize=$(func_set_params "${batch_size_key}" "${batch_size}")
set_tensorrt=$(func_set_params "${use_trt_key}" "${use_trt}")
set_precision=$(func_set_params "${precision_key}" "${precision}")
set_model_dir=$(func_set_params "${infer_model_key}" "${_model_dir}")
set_infer_params1=$(func_set_params "${infer_key1}" "${infer_value1}")
command="${_python} ${_script} ${use_gpu_key}=${use_gpu} ${set_tensorrt} ${set_precision} ${set_model_dir} ${set_batchsize} ${set_infer_data} ${set_benchmark} ${set_infer_params1} > ${_save_log_path} 2>&1 "
eval $command
last_status=${PIPESTATUS[0]}
eval "cat ${_save_log_path}"
status_check $last_status "${command}" "${status_log}"
done
done
done
else
echo "Does not support hardware other than CPU and GPU Currently!"
fi
done
}
if [ ${MODE} = "infer" ]; then
GPUID=$3
if [ ${#GPUID} -le 0 ];then
env=" "
else
env="export CUDA_VISIBLE_DEVICES=${GPUID}"
fi
# set CUDA_VISIBLE_DEVICES
eval $env
export Count=0
IFS="|"
infer_run_exports=(${infer_export_list})
infer_quant_flag=(${infer_is_quant})
for infer_model in ${infer_model_dir_list[*]}; do
# run export
if [ ${infer_run_exports[Count]} != "null" ];then
set_export_weight=$(func_set_params "${export_weight}" "${infer_model}")
set_save_infer_key=$(func_set_params "${save_infer_key}" "${infer_model}")
export_cmd="${python} ${norm_export} ${set_export_weight} ${set_save_infer_key}"
eval $export_cmd
status_export=$?
if [ ${status_export} = 0 ];then
status_check $status_export "${export_cmd}" "${status_log}"
fi
fi
#run inference
is_quant=${infer_quant_flag[Count]}
func_inference "${python}" "${inference_py}" "${infer_model}" "${LOG_PATH}" "${infer_img_dir}" ${is_quant}
Count=$(($Count + 1))
done
else
IFS="|"
export Count=0
USE_GPU_KEY=(${train_use_gpu_value})
for gpu in ${gpu_list[*]}; do
use_gpu=${USE_GPU_KEY[Count]}
Count=$(($Count + 1))
if [ ${gpu} = "-1" ];then
env=""
elif [ ${#gpu} -le 1 ];then
env="export CUDA_VISIBLE_DEVICES=${gpu}"
eval ${env}
elif [ ${#gpu} -le 15 ];then
IFS=","
array=(${gpu})
env="export CUDA_VISIBLE_DEVICES=${array[0]}"
IFS="|"
else
IFS=";"
array=(${gpu})
ips=${array[0]}
gpu=${array[1]}
IFS="|"
env=" "
fi
for autocast in ${autocast_list[*]}; do
for trainer in ${trainer_list[*]}; do
flag_quant=False
if [ ${trainer} = ${pact_key} ]; then
run_train=${pact_trainer}
run_export=${pact_export}
flag_quant=True
elif [ ${trainer} = "${fpgm_key}" ]; then
run_train=${fpgm_trainer}
run_export=${fpgm_export}
elif [ ${trainer} = "${distill_key}" ]; then
run_train=${distill_trainer}
run_export=${distill_export}
elif [ ${trainer} = ${trainer_key1} ]; then
run_train=${trainer_value1}
run_export=${export_value1}
elif [[ ${trainer} = ${trainer_key2} ]]; then
run_train=${trainer_value2}
run_export=${export_value2}
else
run_train=${norm_trainer}
run_export=${norm_export}
fi
if [ ${run_train} = "null" ]; then
continue
fi
set_autocast=$(func_set_params "${autocast_key}" "${autocast}")
set_epoch=$(func_set_params "${epoch_key}" "${epoch_num}")
set_pretrain=$(func_set_params "${pretrain_model_key}" "${pretrain_model_value}")
set_batchsize=$(func_set_params "${train_batch_key}" "${train_batch_value}")
set_train_params1=$(func_set_params "${train_param_key1}" "${train_param_value1}")
set_use_gpu=$(func_set_params "${train_use_gpu_key}" "${use_gpu}")
save_log="${LOG_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}"
# load pretrain from norm training if current trainer is pact or fpgm trainer
if [ ${trainer} = ${pact_key} ] || [ ${trainer} = ${fpgm_key} ]; then
set_pretrain="${load_norm_train_model}"
fi
set_save_model=$(func_set_params "${save_model_key}" "${save_log}")
if [ ${#gpu} -le 2 ];then # train with cpu or single gpu
cmd="${python} ${run_train} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1} "
elif [ ${#gpu} -le 15 ];then # train with multi-gpu
cmd="${python} -m paddle.distributed.launch --gpus=${gpu} ${run_train} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1}"
else # train with multi-machine
cmd="${python} -m paddle.distributed.launch --ips=${ips} --gpus=${gpu} ${run_train} ${set_save_model} ${set_pretrain} ${set_epoch} ${set_autocast} ${set_batchsize} ${set_train_params1}"
fi
# run train
eval "unset CUDA_VISIBLE_DEVICES"
eval $cmd
status_check $? "${cmd}" "${status_log}"
set_eval_pretrain=$(func_set_params "${pretrain_model_key}" "${save_log}/${train_model_name}")
# save norm trained models to set pretrain for pact training and fpgm training
if [ ${trainer} = ${trainer_norm} ]; then
load_norm_train_model=${set_eval_pretrain}
fi
# run eval
if [ ${eval_py} != "null" ]; then
set_eval_params1=$(func_set_params "${eval_key1}" "${eval_value1}")
eval_cmd="${python} ${eval_py} ${set_eval_pretrain} ${set_use_gpu} ${set_eval_params1}"
eval $eval_cmd
status_check $? "${eval_cmd}" "${status_log}"
fi
# run export model
if [ ${run_export} != "null" ]; then
# run export model
save_infer_path="${save_log}"
set_export_weight=$(func_set_params "${export_weight}" "${save_log}/${train_model_name}")
set_save_infer_key=$(func_set_params "${save_infer_key}" "${save_infer_path}")
export_cmd="${python} ${run_export} ${set_export_weight} ${set_save_infer_key}"
eval $export_cmd
status_check $? "${export_cmd}" "${status_log}"
#run inference
eval $env
save_infer_path="${save_log}"
func_inference "${python}" "${inference_py}" "${save_infer_path}" "${LOG_PATH}" "${train_infer_img_dir}" "${flag_quant}"
eval "unset CUDA_VISIBLE_DEVICES"
fi
done # done with: for trainer in ${trainer_list[*]}; do
done # done with: for autocast in ${autocast_list[*]}; do
done # done with: for gpu in ${gpu_list[*]}; do
fi # end if [ ${MODE} = "infer" ]; then
...@@ -27,7 +27,7 @@ from ppocr.data import build_dataloader ...@@ -27,7 +27,7 @@ from ppocr.data import build_dataloader
from ppocr.modeling.architectures import build_model from ppocr.modeling.architectures import build_model
from ppocr.postprocess import build_post_process from ppocr.postprocess import build_post_process
from ppocr.metrics import build_metric from ppocr.metrics import build_metric
from ppocr.utils.save_load import init_model from ppocr.utils.save_load import init_model, load_pretrained_params
from ppocr.utils.utility import print_dict from ppocr.utils.utility import print_dict
import tools.program as program import tools.program as program
...@@ -44,12 +44,23 @@ def main(): ...@@ -44,12 +44,23 @@ def main():
# build model # build model
# for rec algorithm # for rec algorithm
if hasattr(post_process_class, 'character'): if hasattr(post_process_class, 'character'):
config['Architecture']["Head"]['out_channels'] = len( char_num = len(getattr(post_process_class, 'character'))
getattr(post_process_class, 'character')) if config['Architecture']["algorithm"] in ["Distillation",
]: # distillation model
for key in config['Architecture']["Models"]:
config['Architecture']["Models"][key]["Head"][
'out_channels'] = char_num
else: # base rec model
config['Architecture']["Head"]['out_channels'] = char_num
model = build_model(config['Architecture']) model = build_model(config['Architecture'])
use_srn = config['Architecture']['algorithm'] == "SRN" use_srn = config['Architecture']['algorithm'] == "SRN"
if "model_type" in config['Architecture'].keys():
model_type = config['Architecture']['model_type']
else:
model_type = None
best_model_dict = init_model(config, model, logger) best_model_dict = init_model(config, model)
if len(best_model_dict): if len(best_model_dict):
logger.info('metric in ckpt ***************') logger.info('metric in ckpt ***************')
for k, v in best_model_dict.items(): for k, v in best_model_dict.items():
...@@ -60,7 +71,7 @@ def main(): ...@@ -60,7 +71,7 @@ def main():
# start eval # start eval
metric = program.eval(model, valid_dataloader, post_process_class, metric = program.eval(model, valid_dataloader, post_process_class,
eval_class, use_srn) eval_class, model_type, use_srn)
logger.info('metric eval ***************') logger.info('metric eval ***************')
for k, v in metric.items(): for k, v in metric.items():
logger.info('{}:{}'.format(k, v)) logger.info('{}:{}'.format(k, v))
......
...@@ -17,7 +17,7 @@ import sys ...@@ -17,7 +17,7 @@ import sys
__dir__ = os.path.dirname(os.path.abspath(__file__)) __dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__) sys.path.append(__dir__)
sys.path.append(os.path.abspath(os.path.join(__dir__, '..'))) sys.path.append(os.path.abspath(os.path.join(__dir__, "..")))
import argparse import argparse
...@@ -31,32 +31,12 @@ from ppocr.utils.logging import get_logger ...@@ -31,32 +31,12 @@ from ppocr.utils.logging import get_logger
from tools.program import load_config, merge_config, ArgsParser from tools.program import load_config, merge_config, ArgsParser
def main(): def export_single_model(model, arch_config, save_path, logger):
FLAGS = ArgsParser().parse_args() if arch_config["algorithm"] == "SRN":
config = load_config(FLAGS.config) max_text_length = arch_config["Head"]["max_text_length"]
merge_config(FLAGS.opt)
logger = get_logger()
# build post process
post_process_class = build_post_process(config['PostProcess'],
config['Global'])
# build model
# for rec algorithm
if hasattr(post_process_class, 'character'):
char_num = len(getattr(post_process_class, 'character'))
config['Architecture']["Head"]['out_channels'] = char_num
model = build_model(config['Architecture'])
init_model(config, model, logger)
model.eval()
save_path = '{}/inference'.format(config['Global']['save_inference_dir'])
if config['Architecture']['algorithm'] == "SRN":
max_text_length = config['Architecture']['Head']['max_text_length']
other_shape = [ other_shape = [
paddle.static.InputSpec( paddle.static.InputSpec(
shape=[None, 1, 64, 256], dtype='float32'), [ shape=[None, 1, 64, 256], dtype="float32"), [
paddle.static.InputSpec( paddle.static.InputSpec(
shape=[None, 256, 1], shape=[None, 256, 1],
dtype="int64"), paddle.static.InputSpec( dtype="int64"), paddle.static.InputSpec(
...@@ -71,24 +51,67 @@ def main(): ...@@ -71,24 +51,67 @@ def main():
model = to_static(model, input_spec=other_shape) model = to_static(model, input_spec=other_shape)
else: else:
infer_shape = [3, -1, -1] infer_shape = [3, -1, -1]
if config['Architecture']['model_type'] == "rec": if arch_config["model_type"] == "rec":
infer_shape = [3, 32, -1] # for rec model, H must be 32 infer_shape = [3, 32, -1] # for rec model, H must be 32
if 'Transform' in config['Architecture'] and config['Architecture'][ if "Transform" in arch_config and arch_config[
'Transform'] is not None and config['Architecture'][ "Transform"] is not None and arch_config["Transform"][
'Transform']['name'] == 'TPS': "name"] == "TPS":
logger.info( logger.info(
'When there is tps in the network, variable length input is not supported, and the input size needs to be the same as during training' "When there is tps in the network, variable length input is not supported, and the input size needs to be the same as during training"
) )
infer_shape[-1] = 100 infer_shape[-1] = 100
elif arch_config["model_type"] == "table":
infer_shape = [3, 488, 488]
model = to_static( model = to_static(
model, model,
input_spec=[ input_spec=[
paddle.static.InputSpec( paddle.static.InputSpec(
shape=[None] + infer_shape, dtype='float32') shape=[None] + infer_shape, dtype="float32")
]) ])
paddle.jit.save(model, save_path) paddle.jit.save(model, save_path)
logger.info('inference model is saved to {}'.format(save_path)) logger.info("inference model is saved to {}".format(save_path))
return
def main():
FLAGS = ArgsParser().parse_args()
config = load_config(FLAGS.config)
merge_config(FLAGS.opt)
logger = get_logger()
# build post process
post_process_class = build_post_process(config["PostProcess"],
config["Global"])
# build model
# for rec algorithm
if hasattr(post_process_class, "character"):
char_num = len(getattr(post_process_class, "character"))
if config["Architecture"]["algorithm"] in ["Distillation",
]: # distillation model
for key in config["Architecture"]["Models"]:
config["Architecture"]["Models"][key]["Head"][
"out_channels"] = char_num
else: # base rec model
config["Architecture"]["Head"]["out_channels"] = char_num
model = build_model(config["Architecture"])
init_model(config, model)
model.eval()
save_path = config["Global"]["save_inference_dir"]
arch_config = config["Architecture"]
if arch_config["algorithm"] in ["Distillation", ]: # distillation model
archs = list(arch_config["Models"].values())
for idx, name in enumerate(model.model_name_list):
sub_model_save_path = os.path.join(save_path, name, "inference")
export_single_model(model.model_list[idx], archs[idx],
sub_model_save_path, logger)
else:
save_path = os.path.join(save_path, "inference")
export_single_model(model, arch_config, save_path, logger)
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -45,7 +45,7 @@ class TextClassifier(object): ...@@ -45,7 +45,7 @@ class TextClassifier(object):
"label_list": args.label_list, "label_list": args.label_list,
} }
self.postprocess_op = build_post_process(postprocess_params) self.postprocess_op = build_post_process(postprocess_params)
self.predictor, self.input_tensor, self.output_tensors = \ self.predictor, self.input_tensor, self.output_tensors, _ = \
utility.create_predictor(args, 'cls', logger) utility.create_predictor(args, 'cls', logger)
def resize_norm_img(self, img): def resize_norm_img(self, img):
...@@ -84,9 +84,11 @@ class TextClassifier(object): ...@@ -84,9 +84,11 @@ class TextClassifier(object):
batch_num = self.cls_batch_num batch_num = self.cls_batch_num
elapse = 0 elapse = 0
for beg_img_no in range(0, img_num, batch_num): for beg_img_no in range(0, img_num, batch_num):
end_img_no = min(img_num, beg_img_no + batch_num) end_img_no = min(img_num, beg_img_no + batch_num)
norm_img_batch = [] norm_img_batch = []
max_wh_ratio = 0 max_wh_ratio = 0
starttime = time.time()
for ino in range(beg_img_no, end_img_no): for ino in range(beg_img_no, end_img_no):
h, w = img_list[indices[ino]].shape[0:2] h, w = img_list[indices[ino]].shape[0:2]
wh_ratio = w * 1.0 / h wh_ratio = w * 1.0 / h
...@@ -97,7 +99,7 @@ class TextClassifier(object): ...@@ -97,7 +99,7 @@ class TextClassifier(object):
norm_img_batch.append(norm_img) norm_img_batch.append(norm_img)
norm_img_batch = np.concatenate(norm_img_batch) norm_img_batch = np.concatenate(norm_img_batch)
norm_img_batch = norm_img_batch.copy() norm_img_batch = norm_img_batch.copy()
starttime = time.time()
self.input_tensor.copy_from_cpu(norm_img_batch) self.input_tensor.copy_from_cpu(norm_img_batch)
self.predictor.run() self.predictor.run()
prob_out = self.output_tensors[0].copy_to_cpu() prob_out = self.output_tensors[0].copy_to_cpu()
...@@ -141,8 +143,8 @@ def main(args): ...@@ -141,8 +143,8 @@ def main(args):
for ino in range(len(img_list)): for ino in range(len(img_list)):
logger.info("Predicts of {}:{}".format(valid_image_file_list[ino], logger.info("Predicts of {}:{}".format(valid_image_file_list[ino],
cls_res[ino])) cls_res[ino]))
logger.info("Total predict time for {} images, cost: {:.3f}".format( logger.info(
len(img_list), predict_time)) "The predict time about text angle classify module is as follows: ")
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -41,7 +41,7 @@ class TextDetector(object): ...@@ -41,7 +41,7 @@ class TextDetector(object):
pre_process_list = [{ pre_process_list = [{
'DetResizeForTest': { 'DetResizeForTest': {
'limit_side_len': args.det_limit_side_len, 'limit_side_len': args.det_limit_side_len,
'limit_type': args.det_limit_type 'limit_type': args.det_limit_type,
} }
}, { }, {
'NormalizeImage': { 'NormalizeImage': {
...@@ -95,9 +95,27 @@ class TextDetector(object): ...@@ -95,9 +95,27 @@ class TextDetector(object):
self.preprocess_op = create_operators(pre_process_list) self.preprocess_op = create_operators(pre_process_list)
self.postprocess_op = build_post_process(postprocess_params) self.postprocess_op = build_post_process(postprocess_params)
self.predictor, self.input_tensor, self.output_tensors = utility.create_predictor( self.predictor, self.input_tensor, self.output_tensors, self.config = utility.create_predictor(
args, 'det', logger) # paddle.jit.load(args.det_model_dir) args, 'det', logger)
# self.predictor.eval()
if args.benchmark:
import auto_log
pid = os.getpid()
self.autolog = auto_log.AutoLogger(
model_name="det",
model_precision=args.precision,
batch_size=1,
data_shape="dynamic",
save_path=None,
inference_config=self.config,
pids=pid,
process_name=None,
gpu_ids=0,
time_keys=[
'preprocess_time', 'inference_time', 'postprocess_time'
],
warmup=2,
logger=logger)
def order_points_clockwise(self, pts): def order_points_clockwise(self, pts):
""" """
...@@ -155,6 +173,12 @@ class TextDetector(object): ...@@ -155,6 +173,12 @@ class TextDetector(object):
def __call__(self, img): def __call__(self, img):
ori_im = img.copy() ori_im = img.copy()
data = {'image': img} data = {'image': img}
st = time.time()
if self.args.benchmark:
self.autolog.times.start()
data = transform(data, self.preprocess_op) data = transform(data, self.preprocess_op)
img, shape_list = data img, shape_list = data
if img is None: if img is None:
...@@ -162,7 +186,9 @@ class TextDetector(object): ...@@ -162,7 +186,9 @@ class TextDetector(object):
img = np.expand_dims(img, axis=0) img = np.expand_dims(img, axis=0)
shape_list = np.expand_dims(shape_list, axis=0) shape_list = np.expand_dims(shape_list, axis=0)
img = img.copy() img = img.copy()
starttime = time.time()
if self.args.benchmark:
self.autolog.times.stamp()
self.input_tensor.copy_from_cpu(img) self.input_tensor.copy_from_cpu(img)
self.predictor.run() self.predictor.run()
...@@ -170,6 +196,8 @@ class TextDetector(object): ...@@ -170,6 +196,8 @@ class TextDetector(object):
for output_tensor in self.output_tensors: for output_tensor in self.output_tensors:
output = output_tensor.copy_to_cpu() output = output_tensor.copy_to_cpu()
outputs.append(output) outputs.append(output)
if self.args.benchmark:
self.autolog.times.stamp()
preds = {} preds = {}
if self.det_algorithm == "EAST": if self.det_algorithm == "EAST":
...@@ -184,15 +212,19 @@ class TextDetector(object): ...@@ -184,15 +212,19 @@ class TextDetector(object):
preds['maps'] = outputs[0] preds['maps'] = outputs[0]
else: else:
raise NotImplementedError raise NotImplementedError
self.predictor.try_shrink_memory()
#self.predictor.try_shrink_memory()
post_result = self.postprocess_op(preds, shape_list) post_result = self.postprocess_op(preds, shape_list)
dt_boxes = post_result[0]['points'] dt_boxes = post_result[0]['points']
if self.det_algorithm == "SAST" and self.det_sast_polygon: if self.det_algorithm == "SAST" and self.det_sast_polygon:
dt_boxes = self.filter_tag_det_res_only_clip(dt_boxes, ori_im.shape) dt_boxes = self.filter_tag_det_res_only_clip(dt_boxes, ori_im.shape)
else: else:
dt_boxes = self.filter_tag_det_res(dt_boxes, ori_im.shape) dt_boxes = self.filter_tag_det_res(dt_boxes, ori_im.shape)
elapse = time.time() - starttime
return dt_boxes, elapse if self.args.benchmark:
self.autolog.times.end(stamp=True)
et = time.time()
return dt_boxes, et - st
if __name__ == "__main__": if __name__ == "__main__":
...@@ -202,6 +234,12 @@ if __name__ == "__main__": ...@@ -202,6 +234,12 @@ if __name__ == "__main__":
count = 0 count = 0
total_time = 0 total_time = 0
draw_img_save = "./inference_results" draw_img_save = "./inference_results"
if args.warmup:
img = np.random.uniform(0, 255, [640, 640, 3]).astype(np.uint8)
for i in range(2):
res = text_detector(img)
if not os.path.exists(draw_img_save): if not os.path.exists(draw_img_save):
os.makedirs(draw_img_save) os.makedirs(draw_img_save)
for image_file in image_file_list: for image_file in image_file_list:
...@@ -211,10 +249,13 @@ if __name__ == "__main__": ...@@ -211,10 +249,13 @@ if __name__ == "__main__":
if img is None: if img is None:
logger.info("error in loading image:{}".format(image_file)) logger.info("error in loading image:{}".format(image_file))
continue continue
dt_boxes, elapse = text_detector(img) st = time.time()
dt_boxes, _ = text_detector(img)
elapse = time.time() - st
if count > 0: if count > 0:
total_time += elapse total_time += elapse
count += 1 count += 1
logger.info("Predict time of {}: {}".format(image_file, elapse)) logger.info("Predict time of {}: {}".format(image_file, elapse))
src_im = utility.draw_text_det_res(dt_boxes, image_file) src_im = utility.draw_text_det_res(dt_boxes, image_file)
img_name_pure = os.path.split(image_file)[-1] img_name_pure = os.path.split(image_file)[-1]
...@@ -222,5 +263,6 @@ if __name__ == "__main__": ...@@ -222,5 +263,6 @@ if __name__ == "__main__":
"det_res_{}".format(img_name_pure)) "det_res_{}".format(img_name_pure))
cv2.imwrite(img_path, src_im) cv2.imwrite(img_path, src_im)
logger.info("The visualized image saved in {}".format(img_path)) logger.info("The visualized image saved in {}".format(img_path))
if count > 1:
logger.info("Avg Time: {}".format(total_time / (count - 1))) if args.benchmark:
text_detector.autolog.report()
...@@ -41,7 +41,6 @@ class TextRecognizer(object): ...@@ -41,7 +41,6 @@ class TextRecognizer(object):
self.character_type = args.rec_char_type self.character_type = args.rec_char_type
self.rec_batch_num = args.rec_batch_num self.rec_batch_num = args.rec_batch_num
self.rec_algorithm = args.rec_algorithm self.rec_algorithm = args.rec_algorithm
self.max_text_length = args.max_text_length
postprocess_params = { postprocess_params = {
'name': 'CTCLabelDecode', 'name': 'CTCLabelDecode',
"character_type": args.rec_char_type, "character_type": args.rec_char_type,
...@@ -63,8 +62,27 @@ class TextRecognizer(object): ...@@ -63,8 +62,27 @@ class TextRecognizer(object):
"use_space_char": args.use_space_char "use_space_char": args.use_space_char
} }
self.postprocess_op = build_post_process(postprocess_params) self.postprocess_op = build_post_process(postprocess_params)
self.predictor, self.input_tensor, self.output_tensors = \ self.predictor, self.input_tensor, self.output_tensors, self.config = \
utility.create_predictor(args, 'rec', logger) utility.create_predictor(args, 'rec', logger)
self.benchmark = args.benchmark
if args.benchmark:
import auto_log
pid = os.getpid()
self.autolog = auto_log.AutoLogger(
model_name="rec",
model_precision=args.precision,
batch_size=args.rec_batch_num,
data_shape="dynamic",
save_path=None, #args.save_log_path,
inference_config=self.config,
pids=pid,
process_name=None,
gpu_ids=0 if args.use_gpu else None,
time_keys=[
'preprocess_time', 'inference_time', 'postprocess_time'
],
warmup=2,
logger=logger)
def resize_norm_img(self, img, max_wh_ratio): def resize_norm_img(self, img, max_wh_ratio):
imgC, imgH, imgW = self.rec_image_shape imgC, imgH, imgW = self.rec_image_shape
...@@ -166,17 +184,16 @@ class TextRecognizer(object): ...@@ -166,17 +184,16 @@ class TextRecognizer(object):
width_list.append(img.shape[1] / float(img.shape[0])) width_list.append(img.shape[1] / float(img.shape[0]))
# Sorting can speed up the recognition process # Sorting can speed up the recognition process
indices = np.argsort(np.array(width_list)) indices = np.argsort(np.array(width_list))
# rec_res = []
rec_res = [['', 0.0]] * img_num rec_res = [['', 0.0]] * img_num
batch_num = self.rec_batch_num batch_num = self.rec_batch_num
elapse = 0 st = time.time()
if self.benchmark:
self.autolog.times.start()
for beg_img_no in range(0, img_num, batch_num): for beg_img_no in range(0, img_num, batch_num):
end_img_no = min(img_num, beg_img_no + batch_num) end_img_no = min(img_num, beg_img_no + batch_num)
norm_img_batch = [] norm_img_batch = []
max_wh_ratio = 0 max_wh_ratio = 0
for ino in range(beg_img_no, end_img_no): for ino in range(beg_img_no, end_img_no):
# h, w = img_list[ino].shape[0:2]
h, w = img_list[indices[ino]].shape[0:2] h, w = img_list[indices[ino]].shape[0:2]
wh_ratio = w * 1.0 / h wh_ratio = w * 1.0 / h
max_wh_ratio = max(max_wh_ratio, wh_ratio) max_wh_ratio = max(max_wh_ratio, wh_ratio)
...@@ -187,9 +204,8 @@ class TextRecognizer(object): ...@@ -187,9 +204,8 @@ class TextRecognizer(object):
norm_img = norm_img[np.newaxis, :] norm_img = norm_img[np.newaxis, :]
norm_img_batch.append(norm_img) norm_img_batch.append(norm_img)
else: else:
norm_img = self.process_image_srn(img_list[indices[ino]], norm_img = self.process_image_srn(
self.rec_image_shape, 8, img_list[indices[ino]], self.rec_image_shape, 8, 25)
self.max_text_length)
encoder_word_pos_list = [] encoder_word_pos_list = []
gsrm_word_pos_list = [] gsrm_word_pos_list = []
gsrm_slf_attn_bias1_list = [] gsrm_slf_attn_bias1_list = []
...@@ -201,9 +217,10 @@ class TextRecognizer(object): ...@@ -201,9 +217,10 @@ class TextRecognizer(object):
norm_img_batch.append(norm_img[0]) norm_img_batch.append(norm_img[0])
norm_img_batch = np.concatenate(norm_img_batch) norm_img_batch = np.concatenate(norm_img_batch)
norm_img_batch = norm_img_batch.copy() norm_img_batch = norm_img_batch.copy()
if self.benchmark:
self.autolog.times.stamp()
if self.rec_algorithm == "SRN": if self.rec_algorithm == "SRN":
starttime = time.time()
encoder_word_pos_list = np.concatenate(encoder_word_pos_list) encoder_word_pos_list = np.concatenate(encoder_word_pos_list)
gsrm_word_pos_list = np.concatenate(gsrm_word_pos_list) gsrm_word_pos_list = np.concatenate(gsrm_word_pos_list)
gsrm_slf_attn_bias1_list = np.concatenate( gsrm_slf_attn_bias1_list = np.concatenate(
...@@ -228,9 +245,10 @@ class TextRecognizer(object): ...@@ -228,9 +245,10 @@ class TextRecognizer(object):
for output_tensor in self.output_tensors: for output_tensor in self.output_tensors:
output = output_tensor.copy_to_cpu() output = output_tensor.copy_to_cpu()
outputs.append(output) outputs.append(output)
if self.benchmark:
self.autolog.times.stamp()
preds = {"predict": outputs[2]} preds = {"predict": outputs[2]}
else: else:
starttime = time.time()
self.input_tensor.copy_from_cpu(norm_img_batch) self.input_tensor.copy_from_cpu(norm_img_batch)
self.predictor.run() self.predictor.run()
...@@ -238,23 +256,30 @@ class TextRecognizer(object): ...@@ -238,23 +256,30 @@ class TextRecognizer(object):
for output_tensor in self.output_tensors: for output_tensor in self.output_tensors:
output = output_tensor.copy_to_cpu() output = output_tensor.copy_to_cpu()
outputs.append(output) outputs.append(output)
if self.benchmark:
self.autolog.times.stamp()
preds = outputs[0] preds = outputs[0]
self.predictor.try_shrink_memory()
rec_result = self.postprocess_op(preds) rec_result = self.postprocess_op(preds)
for rno in range(len(rec_result)): for rno in range(len(rec_result)):
rec_res[indices[beg_img_no + rno]] = rec_result[rno] rec_res[indices[beg_img_no + rno]] = rec_result[rno]
elapse += time.time() - starttime if self.benchmark:
return rec_res, elapse self.autolog.times.end(stamp=True)
return rec_res, time.time() - st
def main(args): def main(args):
image_file_list = get_image_file_list(args.image_dir) image_file_list = get_image_file_list(args.image_dir)
text_recognizer = TextRecognizer(args) text_recognizer = TextRecognizer(args)
total_run_time = 0.0
total_images_num = 0
valid_image_file_list = [] valid_image_file_list = []
img_list = [] img_list = []
for idx, image_file in enumerate(image_file_list):
# warmup 2 times
if args.warmup:
img = np.random.uniform(0, 255, [32, 320, 3]).astype(np.uint8)
for i in range(2):
res = text_recognizer([img])
for image_file in image_file_list:
img, flag = check_and_read_gif(image_file) img, flag = check_and_read_gif(image_file)
if not flag: if not flag:
img = cv2.imread(image_file) img = cv2.imread(image_file)
...@@ -263,29 +288,18 @@ def main(args): ...@@ -263,29 +288,18 @@ def main(args):
continue continue
valid_image_file_list.append(image_file) valid_image_file_list.append(image_file)
img_list.append(img) img_list.append(img)
if len(img_list) >= args.rec_batch_num or idx == len( try:
image_file_list) - 1: rec_res, _ = text_recognizer(img_list)
try:
rec_res, predict_time = text_recognizer(img_list) except Exception as E:
total_run_time += predict_time logger.info(traceback.format_exc())
except: logger.info(E)
logger.info(traceback.format_exc()) exit()
logger.info( for ino in range(len(img_list)):
"ERROR!!!! \n" logger.info("Predicts of {}:{}".format(valid_image_file_list[ino],
"Please read the FAQ:https://github.com/PaddlePaddle/PaddleOCR#faq \n" rec_res[ino]))
"If your model has tps module: " if args.benchmark:
"TPS does not support variable shape.\n" text_recognizer.autolog.report()
"Please set --rec_image_shape='3,32,100' and --rec_char_type='en' "
)
exit()
for ino in range(len(img_list)):
logger.info("Predicts of {}:{}".format(valid_image_file_list[
ino], rec_res[ino]))
total_images_num += len(valid_image_file_list)
valid_image_file_list = []
img_list = []
logger.info("Total predict time for {} images, cost: {:.3f}".format(
total_images_num, total_run_time))
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -25,6 +25,7 @@ import cv2 ...@@ -25,6 +25,7 @@ import cv2
import copy import copy
import numpy as np import numpy as np
import time import time
import logging
from PIL import Image from PIL import Image
import tools.infer.utility as utility import tools.infer.utility as utility
import tools.infer.predict_rec as predict_rec import tools.infer.predict_rec as predict_rec
...@@ -32,13 +33,15 @@ import tools.infer.predict_det as predict_det ...@@ -32,13 +33,15 @@ import tools.infer.predict_det as predict_det
import tools.infer.predict_cls as predict_cls import tools.infer.predict_cls as predict_cls
from ppocr.utils.utility import get_image_file_list, check_and_read_gif from ppocr.utils.utility import get_image_file_list, check_and_read_gif
from ppocr.utils.logging import get_logger from ppocr.utils.logging import get_logger
from tools.infer.utility import draw_ocr_box_txt from tools.infer.utility import draw_ocr_box_txt, get_rotate_crop_image
logger = get_logger() logger = get_logger()
class TextSystem(object): class TextSystem(object):
def __init__(self, args): def __init__(self, args):
if not args.show_log:
logger.setLevel(logging.INFO)
self.text_detector = predict_det.TextDetector(args) self.text_detector = predict_det.TextDetector(args)
self.text_recognizer = predict_rec.TextRecognizer(args) self.text_recognizer = predict_rec.TextRecognizer(args)
self.use_angle_cls = args.use_angle_cls self.use_angle_cls = args.use_angle_cls
...@@ -46,49 +49,17 @@ class TextSystem(object): ...@@ -46,49 +49,17 @@ class TextSystem(object):
if self.use_angle_cls: if self.use_angle_cls:
self.text_classifier = predict_cls.TextClassifier(args) self.text_classifier = predict_cls.TextClassifier(args)
def get_rotate_crop_image(self, img, points):
'''
img_height, img_width = img.shape[0:2]
left = int(np.min(points[:, 0]))
right = int(np.max(points[:, 0]))
top = int(np.min(points[:, 1]))
bottom = int(np.max(points[:, 1]))
img_crop = img[top:bottom, left:right, :].copy()
points[:, 0] = points[:, 0] - left
points[:, 1] = points[:, 1] - top
'''
img_crop_width = int(
max(
np.linalg.norm(points[0] - points[1]),
np.linalg.norm(points[2] - points[3])))
img_crop_height = int(
max(
np.linalg.norm(points[0] - points[3]),
np.linalg.norm(points[1] - points[2])))
pts_std = np.float32([[0, 0], [img_crop_width, 0],
[img_crop_width, img_crop_height],
[0, img_crop_height]])
M = cv2.getPerspectiveTransform(points, pts_std)
dst_img = cv2.warpPerspective(
img,
M, (img_crop_width, img_crop_height),
borderMode=cv2.BORDER_REPLICATE,
flags=cv2.INTER_CUBIC)
dst_img_height, dst_img_width = dst_img.shape[0:2]
if dst_img_height * 1.0 / dst_img_width >= 1.5:
dst_img = np.rot90(dst_img)
return dst_img
def print_draw_crop_rec_res(self, img_crop_list, rec_res): def print_draw_crop_rec_res(self, img_crop_list, rec_res):
bbox_num = len(img_crop_list) bbox_num = len(img_crop_list)
for bno in range(bbox_num): for bno in range(bbox_num):
cv2.imwrite("./output/img_crop_%d.jpg" % bno, img_crop_list[bno]) cv2.imwrite("./output/img_crop_%d.jpg" % bno, img_crop_list[bno])
logger.info(bno, rec_res[bno]) logger.info(bno, rec_res[bno])
def __call__(self, img): def __call__(self, img, cls=True):
ori_im = img.copy() ori_im = img.copy()
dt_boxes, elapse = self.text_detector(img) dt_boxes, elapse = self.text_detector(img)
logger.info("dt_boxes num : {}, elapse : {}".format(
logger.debug("dt_boxes num : {}, elapse : {}".format(
len(dt_boxes), elapse)) len(dt_boxes), elapse))
if dt_boxes is None: if dt_boxes is None:
return None, None return None, None
...@@ -98,16 +69,16 @@ class TextSystem(object): ...@@ -98,16 +69,16 @@ class TextSystem(object):
for bno in range(len(dt_boxes)): for bno in range(len(dt_boxes)):
tmp_box = copy.deepcopy(dt_boxes[bno]) tmp_box = copy.deepcopy(dt_boxes[bno])
img_crop = self.get_rotate_crop_image(ori_im, tmp_box) img_crop = get_rotate_crop_image(ori_im, tmp_box)
img_crop_list.append(img_crop) img_crop_list.append(img_crop)
if self.use_angle_cls: if self.use_angle_cls and cls:
img_crop_list, angle_list, elapse = self.text_classifier( img_crop_list, angle_list, elapse = self.text_classifier(
img_crop_list) img_crop_list)
logger.info("cls num : {}, elapse : {}".format( logger.debug("cls num : {}, elapse : {}".format(
len(img_crop_list), elapse)) len(img_crop_list), elapse))
rec_res, elapse = self.text_recognizer(img_crop_list) rec_res, elapse = self.text_recognizer(img_crop_list)
logger.info("rec_res num : {}, elapse : {}".format( logger.debug("rec_res num : {}, elapse : {}".format(
len(rec_res), elapse)) len(rec_res), elapse))
# self.print_draw_crop_rec_res(img_crop_list, rec_res) # self.print_draw_crop_rec_res(img_crop_list, rec_res)
filter_boxes, filter_rec_res = [], [] filter_boxes, filter_rec_res = [], []
...@@ -147,7 +118,19 @@ def main(args): ...@@ -147,7 +118,19 @@ def main(args):
is_visualize = True is_visualize = True
font_path = args.vis_font_path font_path = args.vis_font_path
drop_score = args.drop_score drop_score = args.drop_score
for image_file in image_file_list:
# warm up 10 times
if args.warmup:
img = np.random.uniform(0, 255, [640, 640, 3]).astype(np.uint8)
for i in range(10):
res = text_sys(img)
total_time = 0
cpu_mem, gpu_mem, gpu_util = 0, 0, 0
_st = time.time()
count = 0
for idx, image_file in enumerate(image_file_list):
img, flag = check_and_read_gif(image_file) img, flag = check_and_read_gif(image_file)
if not flag: if not flag:
img = cv2.imread(image_file) img = cv2.imread(image_file)
...@@ -157,8 +140,10 @@ def main(args): ...@@ -157,8 +140,10 @@ def main(args):
starttime = time.time() starttime = time.time()
dt_boxes, rec_res = text_sys(img) dt_boxes, rec_res = text_sys(img)
elapse = time.time() - starttime elapse = time.time() - starttime
logger.info("Predict time of %s: %.3fs" % (image_file, elapse)) total_time += elapse
logger.info(
str(idx) + " Predict time of %s: %.3fs" % (image_file, elapse))
for text, score in rec_res: for text, score in rec_res:
logger.info("{}, {:.3f}".format(text, score)) logger.info("{}, {:.3f}".format(text, score))
...@@ -178,12 +163,17 @@ def main(args): ...@@ -178,12 +163,17 @@ def main(args):
draw_img_save = "./inference_results/" draw_img_save = "./inference_results/"
if not os.path.exists(draw_img_save): if not os.path.exists(draw_img_save):
os.makedirs(draw_img_save) os.makedirs(draw_img_save)
if flag:
image_file = image_file[:-3] + "png"
cv2.imwrite( cv2.imwrite(
os.path.join(draw_img_save, os.path.basename(image_file)), os.path.join(draw_img_save, os.path.basename(image_file)),
draw_img[:, :, ::-1]) draw_img[:, :, ::-1])
logger.info("The visualized image saved in {}".format( logger.info("The visualized image saved in {}".format(
os.path.join(draw_img_save, os.path.basename(image_file)))) os.path.join(draw_img_save, os.path.basename(image_file))))
logger.info("The predict total time is {}".format(time.time() - _st))
logger.info("\nThe predict total time is {}".format(total_time))
if __name__ == "__main__": if __name__ == "__main__":
args = utility.parse_args() args = utility.parse_args()
......
...@@ -23,19 +23,20 @@ import math ...@@ -23,19 +23,20 @@ import math
from paddle import inference from paddle import inference
import time import time
from ppocr.utils.logging import get_logger from ppocr.utils.logging import get_logger
logger = get_logger()
def parse_args(): def str2bool(v):
def str2bool(v): return v.lower() in ("true", "t", "1")
return v.lower() in ("true", "t", "1")
def init_args():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
# params for prediction engine # params for prediction engine
parser.add_argument("--use_gpu", type=str2bool, default=True) parser.add_argument("--use_gpu", type=str2bool, default=True)
parser.add_argument("--ir_optim", type=str2bool, default=True) parser.add_argument("--ir_optim", type=str2bool, default=True)
parser.add_argument("--use_tensorrt", type=str2bool, default=False) parser.add_argument("--use_tensorrt", type=str2bool, default=False)
parser.add_argument("--use_fp16", type=str2bool, default=False) parser.add_argument("--min_subgraph_size", type=int, default=10)
parser.add_argument("--precision", type=str, default="fp32")
parser.add_argument("--gpu_mem", type=int, default=500) parser.add_argument("--gpu_mem", type=int, default=500)
# params for text detector # params for text detector
...@@ -47,8 +48,8 @@ def parse_args(): ...@@ -47,8 +48,8 @@ def parse_args():
# DB parmas # DB parmas
parser.add_argument("--det_db_thresh", type=float, default=0.3) parser.add_argument("--det_db_thresh", type=float, default=0.3)
parser.add_argument("--det_db_box_thresh", type=float, default=0.5) parser.add_argument("--det_db_box_thresh", type=float, default=0.6)
parser.add_argument("--det_db_unclip_ratio", type=float, default=1.6) parser.add_argument("--det_db_unclip_ratio", type=float, default=1.5)
parser.add_argument("--max_batch_size", type=int, default=10) parser.add_argument("--max_batch_size", type=int, default=10)
parser.add_argument("--use_dilation", type=bool, default=False) parser.add_argument("--use_dilation", type=bool, default=False)
parser.add_argument("--det_db_score_mode", type=str, default="fast") parser.add_argument("--det_db_score_mode", type=str, default="fast")
...@@ -103,11 +104,22 @@ def parse_args(): ...@@ -103,11 +104,22 @@ def parse_args():
parser.add_argument("--enable_mkldnn", type=str2bool, default=False) parser.add_argument("--enable_mkldnn", type=str2bool, default=False)
parser.add_argument("--cpu_threads", type=int, default=10) parser.add_argument("--cpu_threads", type=int, default=10)
parser.add_argument("--use_pdserving", type=str2bool, default=False) parser.add_argument("--use_pdserving", type=str2bool, default=False)
parser.add_argument("--warmup", type=str2bool, default=True)
# multi-process
parser.add_argument("--use_mp", type=str2bool, default=False) parser.add_argument("--use_mp", type=str2bool, default=False)
parser.add_argument("--total_process_num", type=int, default=1) parser.add_argument("--total_process_num", type=int, default=1)
parser.add_argument("--process_id", type=int, default=0) parser.add_argument("--process_id", type=int, default=0)
parser.add_argument("--benchmark", type=bool, default=False)
parser.add_argument("--save_log_path", type=str, default="./log_output/")
parser.add_argument("--show_log", type=str2bool, default=True)
return parser
def parse_args():
parser = init_args()
return parser.parse_args() return parser.parse_args()
...@@ -118,6 +130,8 @@ def create_predictor(args, mode, logger): ...@@ -118,6 +130,8 @@ def create_predictor(args, mode, logger):
model_dir = args.cls_model_dir model_dir = args.cls_model_dir
elif mode == 'rec': elif mode == 'rec':
model_dir = args.rec_model_dir model_dir = args.rec_model_dir
elif mode == 'table':
model_dir = args.table_model_dir
else: else:
model_dir = args.e2e_model_dir model_dir = args.e2e_model_dir
...@@ -127,83 +141,95 @@ def create_predictor(args, mode, logger): ...@@ -127,83 +141,95 @@ def create_predictor(args, mode, logger):
model_file_path = model_dir + "/inference.pdmodel" model_file_path = model_dir + "/inference.pdmodel"
params_file_path = model_dir + "/inference.pdiparams" params_file_path = model_dir + "/inference.pdiparams"
if not os.path.exists(model_file_path): if not os.path.exists(model_file_path):
logger.info("not find model file path {}".format(model_file_path)) raise ValueError("not find model file path {}".format(model_file_path))
sys.exit(0)
if not os.path.exists(params_file_path): if not os.path.exists(params_file_path):
logger.info("not find params file path {}".format(params_file_path)) raise ValueError("not find params file path {}".format(
sys.exit(0) params_file_path))
config = inference.Config(model_file_path, params_file_path) config = inference.Config(model_file_path, params_file_path)
if hasattr(args, 'precision'):
if args.precision == "fp16" and args.use_tensorrt:
precision = inference.PrecisionType.Half
elif args.precision == "int8":
precision = inference.PrecisionType.Int8
else:
precision = inference.PrecisionType.Float32
else:
precision = inference.PrecisionType.Float32
if args.use_gpu: if args.use_gpu:
config.enable_use_gpu(args.gpu_mem, 0) config.enable_use_gpu(args.gpu_mem, 0)
if args.use_tensorrt: if args.use_tensorrt:
config.enable_tensorrt_engine( config.enable_tensorrt_engine(
precision_mode=inference.PrecisionType.Float32, precision_mode=precision,
max_batch_size=args.max_batch_size, max_batch_size=args.max_batch_size,
min_subgraph_size=3) # skip the minmum trt subgraph min_subgraph_size=args.min_subgraph_size)
if mode == "det" and "mobile" in model_file_path: # skip the minmum trt subgraph
if mode == "det":
min_input_shape = { min_input_shape = {
"x": [1, 3, 50, 50], "x": [1, 3, 50, 50],
"conv2d_92.tmp_0": [1, 96, 20, 20], "conv2d_92.tmp_0": [1, 120, 20, 20],
"conv2d_91.tmp_0": [1, 96, 10, 10], "conv2d_91.tmp_0": [1, 24, 10, 10],
"nearest_interp_v2_1.tmp_0": [1, 96, 10, 10], "conv2d_59.tmp_0": [1, 96, 20, 20],
"nearest_interp_v2_2.tmp_0": [1, 96, 20, 20], "nearest_interp_v2_1.tmp_0": [1, 256, 10, 10],
"nearest_interp_v2_3.tmp_0": [1, 24, 20, 20], "nearest_interp_v2_2.tmp_0": [1, 256, 20, 20],
"nearest_interp_v2_4.tmp_0": [1, 24, 20, 20], "conv2d_124.tmp_0": [1, 256, 20, 20],
"nearest_interp_v2_5.tmp_0": [1, 24, 20, 20], "nearest_interp_v2_3.tmp_0": [1, 64, 20, 20],
"nearest_interp_v2_4.tmp_0": [1, 64, 20, 20],
"nearest_interp_v2_5.tmp_0": [1, 64, 20, 20],
"elementwise_add_7": [1, 56, 2, 2], "elementwise_add_7": [1, 56, 2, 2],
"nearest_interp_v2_0.tmp_0": [1, 96, 2, 2] "nearest_interp_v2_0.tmp_0": [1, 256, 2, 2]
} }
max_input_shape = { max_input_shape = {
"x": [1, 3, 2000, 2000], "x": [1, 3, 2000, 2000],
"conv2d_92.tmp_0": [1, 96, 400, 400], "conv2d_92.tmp_0": [1, 120, 400, 400],
"conv2d_91.tmp_0": [1, 96, 200, 200], "conv2d_91.tmp_0": [1, 24, 200, 200],
"nearest_interp_v2_1.tmp_0": [1, 96, 200, 200], "conv2d_59.tmp_0": [1, 96, 400, 400],
"nearest_interp_v2_2.tmp_0": [1, 96, 400, 400], "nearest_interp_v2_1.tmp_0": [1, 256, 200, 200],
"nearest_interp_v2_3.tmp_0": [1, 24, 400, 400], "conv2d_124.tmp_0": [1, 256, 400, 400],
"nearest_interp_v2_4.tmp_0": [1, 24, 400, 400], "nearest_interp_v2_2.tmp_0": [1, 256, 400, 400],
"nearest_interp_v2_5.tmp_0": [1, 24, 400, 400], "nearest_interp_v2_3.tmp_0": [1, 64, 400, 400],
"nearest_interp_v2_4.tmp_0": [1, 64, 400, 400],
"nearest_interp_v2_5.tmp_0": [1, 64, 400, 400],
"elementwise_add_7": [1, 56, 400, 400], "elementwise_add_7": [1, 56, 400, 400],
"nearest_interp_v2_0.tmp_0": [1, 96, 400, 400] "nearest_interp_v2_0.tmp_0": [1, 256, 400, 400]
} }
opt_input_shape = { opt_input_shape = {
"x": [1, 3, 640, 640], "x": [1, 3, 640, 640],
"conv2d_92.tmp_0": [1, 96, 160, 160], "conv2d_92.tmp_0": [1, 120, 160, 160],
"conv2d_91.tmp_0": [1, 96, 80, 80], "conv2d_91.tmp_0": [1, 24, 80, 80],
"nearest_interp_v2_1.tmp_0": [1, 96, 80, 80], "conv2d_59.tmp_0": [1, 96, 160, 160],
"nearest_interp_v2_2.tmp_0": [1, 96, 160, 160], "nearest_interp_v2_1.tmp_0": [1, 256, 80, 80],
"nearest_interp_v2_3.tmp_0": [1, 24, 160, 160], "nearest_interp_v2_2.tmp_0": [1, 256, 160, 160],
"nearest_interp_v2_4.tmp_0": [1, 24, 160, 160], "conv2d_124.tmp_0": [1, 256, 160, 160],
"nearest_interp_v2_5.tmp_0": [1, 24, 160, 160], "nearest_interp_v2_3.tmp_0": [1, 64, 160, 160],
"nearest_interp_v2_4.tmp_0": [1, 64, 160, 160],
"nearest_interp_v2_5.tmp_0": [1, 64, 160, 160],
"elementwise_add_7": [1, 56, 40, 40], "elementwise_add_7": [1, 56, 40, 40],
"nearest_interp_v2_0.tmp_0": [1, 96, 40, 40] "nearest_interp_v2_0.tmp_0": [1, 256, 40, 40]
} }
if mode == "det" and "server" in model_file_path: min_pact_shape = {
min_input_shape = { "nearest_interp_v2_26.tmp_0":[1,256,20,20],
"x": [1, 3, 50, 50], "nearest_interp_v2_27.tmp_0":[1,64,20,20],
"conv2d_59.tmp_0": [1, 96, 20, 20], "nearest_interp_v2_28.tmp_0":[1,64,20,20],
"nearest_interp_v2_2.tmp_0": [1, 96, 20, 20], "nearest_interp_v2_29.tmp_0":[1,64,20,20]
"nearest_interp_v2_3.tmp_0": [1, 24, 20, 20],
"nearest_interp_v2_4.tmp_0": [1, 24, 20, 20],
"nearest_interp_v2_5.tmp_0": [1, 24, 20, 20]
} }
max_input_shape = { max_pact_shape = {
"x": [1, 3, 2000, 2000], "nearest_interp_v2_26.tmp_0":[1,256,400,400],
"conv2d_59.tmp_0": [1, 96, 400, 400], "nearest_interp_v2_27.tmp_0":[1,64,400,400],
"nearest_interp_v2_2.tmp_0": [1, 96, 400, 400], "nearest_interp_v2_28.tmp_0":[1,64,400,400],
"nearest_interp_v2_3.tmp_0": [1, 24, 400, 400], "nearest_interp_v2_29.tmp_0":[1,64,400,400]
"nearest_interp_v2_4.tmp_0": [1, 24, 400, 400],
"nearest_interp_v2_5.tmp_0": [1, 24, 400, 400]
} }
opt_input_shape = { opt_pact_shape = {
"x": [1, 3, 640, 640], "nearest_interp_v2_26.tmp_0":[1,256,160,160],
"conv2d_59.tmp_0": [1, 96, 160, 160], "nearest_interp_v2_27.tmp_0":[1,64,160,160],
"nearest_interp_v2_2.tmp_0": [1, 96, 160, 160], "nearest_interp_v2_28.tmp_0":[1,64,160,160],
"nearest_interp_v2_3.tmp_0": [1, 24, 160, 160], "nearest_interp_v2_29.tmp_0":[1,64,160,160]
"nearest_interp_v2_4.tmp_0": [1, 24, 160, 160],
"nearest_interp_v2_5.tmp_0": [1, 24, 160, 160]
} }
min_input_shape.update(min_pact_shape)
max_input_shape.update(max_pact_shape)
opt_input_shape.update(opt_pact_shape)
elif mode == "rec": elif mode == "rec":
min_input_shape = {"x": [args.rec_batch_num, 3, 32, 10]} min_input_shape = {"x": [args.rec_batch_num, 3, 32, 10]}
max_input_shape = {"x": [args.rec_batch_num, 3, 32, 2000]} max_input_shape = {"x": [args.rec_batch_num, 3, 32, 2000]}
...@@ -233,10 +259,13 @@ def create_predictor(args, mode, logger): ...@@ -233,10 +259,13 @@ def create_predictor(args, mode, logger):
# enable memory optim # enable memory optim
config.enable_memory_optim() config.enable_memory_optim()
config.disable_glog_info() #config.disable_glog_info()
config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass") config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass")
if mode == 'table':
config.delete_pass("fc_fuse_pass") # not supported for table
config.switch_use_feed_fetch_ops(False) config.switch_use_feed_fetch_ops(False)
config.switch_ir_optim(True)
# create predictor # create predictor
predictor = inference.create_predictor(config) predictor = inference.create_predictor(config)
...@@ -248,7 +277,7 @@ def create_predictor(args, mode, logger): ...@@ -248,7 +277,7 @@ def create_predictor(args, mode, logger):
for output_name in output_names: for output_name in output_names:
output_tensor = predictor.get_output_handle(output_name) output_tensor = predictor.get_output_handle(output_name)
output_tensors.append(output_tensor) output_tensors.append(output_tensor)
return predictor, input_tensor, output_tensors return predictor, input_tensor, output_tensors, config
def draw_e2e_res(dt_boxes, strs, img_path): def draw_e2e_res(dt_boxes, strs, img_path):
...@@ -499,5 +528,40 @@ def draw_boxes(image, boxes, scores=None, drop_score=0.5): ...@@ -499,5 +528,40 @@ def draw_boxes(image, boxes, scores=None, drop_score=0.5):
return image return image
def get_rotate_crop_image(img, points):
'''
img_height, img_width = img.shape[0:2]
left = int(np.min(points[:, 0]))
right = int(np.max(points[:, 0]))
top = int(np.min(points[:, 1]))
bottom = int(np.max(points[:, 1]))
img_crop = img[top:bottom, left:right, :].copy()
points[:, 0] = points[:, 0] - left
points[:, 1] = points[:, 1] - top
'''
assert len(points) == 4, "shape of points must be 4*2"
img_crop_width = int(
max(
np.linalg.norm(points[0] - points[1]),
np.linalg.norm(points[2] - points[3])))
img_crop_height = int(
max(
np.linalg.norm(points[0] - points[3]),
np.linalg.norm(points[1] - points[2])))
pts_std = np.float32([[0, 0], [img_crop_width, 0],
[img_crop_width, img_crop_height],
[0, img_crop_height]])
M = cv2.getPerspectiveTransform(points, pts_std)
dst_img = cv2.warpPerspective(
img,
M, (img_crop_width, img_crop_height),
borderMode=cv2.BORDER_REPLICATE,
flags=cv2.INTER_CUBIC)
dst_img_height, dst_img_width = dst_img.shape[0:2]
if dst_img_height * 1.0 / dst_img_width >= 1.5:
dst_img = np.rot90(dst_img)
return dst_img
if __name__ == '__main__': if __name__ == '__main__':
pass pass
...@@ -47,7 +47,7 @@ def main(): ...@@ -47,7 +47,7 @@ def main():
# build model # build model
model = build_model(config['Architecture']) model = build_model(config['Architecture'])
init_model(config, model, logger) init_model(config, model)
# create data ops # create data ops
transforms = [] transforms = []
......
...@@ -61,7 +61,7 @@ def main(): ...@@ -61,7 +61,7 @@ def main():
# build model # build model
model = build_model(config['Architecture']) model = build_model(config['Architecture'])
init_model(config, model, logger) init_model(config, model)
# build post process # build post process
post_process_class = build_post_process(config['PostProcess']) post_process_class = build_post_process(config['PostProcess'])
...@@ -112,4 +112,4 @@ def main(): ...@@ -112,4 +112,4 @@ def main():
if __name__ == '__main__': if __name__ == '__main__':
config, device, logger, vdl_writer = program.preprocess() config, device, logger, vdl_writer = program.preprocess()
main() main()
\ No newline at end of file
...@@ -68,7 +68,7 @@ def main(): ...@@ -68,7 +68,7 @@ def main():
# build model # build model
model = build_model(config['Architecture']) model = build_model(config['Architecture'])
init_model(config, model, logger) init_model(config, model)
# build post process # build post process
post_process_class = build_post_process(config['PostProcess'], post_process_class = build_post_process(config['PostProcess'],
......
...@@ -20,6 +20,7 @@ import numpy as np ...@@ -20,6 +20,7 @@ import numpy as np
import os import os
import sys import sys
import json
__dir__ = os.path.dirname(os.path.abspath(__file__)) __dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__) sys.path.append(__dir__)
...@@ -46,12 +47,18 @@ def main(): ...@@ -46,12 +47,18 @@ def main():
# build model # build model
if hasattr(post_process_class, 'character'): if hasattr(post_process_class, 'character'):
config['Architecture']["Head"]['out_channels'] = len( char_num = len(getattr(post_process_class, 'character'))
getattr(post_process_class, 'character')) if config['Architecture']["algorithm"] in ["Distillation",
]: # distillation model
for key in config['Architecture']["Models"]:
config['Architecture']["Models"][key]["Head"][
'out_channels'] = char_num
else: # base rec model
config['Architecture']["Head"]['out_channels'] = char_num
model = build_model(config['Architecture']) model = build_model(config['Architecture'])
init_model(config, model, logger) init_model(config, model)
# create data ops # create data ops
transforms = [] transforms = []
...@@ -107,11 +114,23 @@ def main(): ...@@ -107,11 +114,23 @@ def main():
else: else:
preds = model(images) preds = model(images)
post_result = post_process_class(preds) post_result = post_process_class(preds)
for rec_reuslt in post_result: info = None
logger.info('\t result: {}'.format(rec_reuslt)) if isinstance(post_result, dict):
if len(rec_reuslt) >= 2: rec_info = dict()
fout.write(file + "\t" + rec_reuslt[0] + "\t" + str( for key in post_result:
rec_reuslt[1]) + "\n") if len(post_result[key][0]) >= 2:
rec_info[key] = {
"label": post_result[key][0][0],
"score": post_result[key][0][1],
}
info = json.dumps(rec_info)
else:
if len(post_result[0]) >= 2:
info = post_result[0][0] + "\t" + str(post_result[0][1])
if info is not None:
logger.info("\t result: {}".format(info))
fout.write(file + "\t" + info)
logger.info("success!") logger.info("success!")
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import os
import sys
import json
__dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__)
sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
import paddle
from paddle.jit import to_static
from ppocr.data import create_operators, transform
from ppocr.modeling.architectures import build_model
from ppocr.postprocess import build_post_process
from ppocr.utils.save_load import init_model
from ppocr.utils.utility import get_image_file_list
import tools.program as program
import cv2
def main(config, device, logger, vdl_writer):
global_config = config['Global']
# build post process
post_process_class = build_post_process(config['PostProcess'],
global_config)
# build model
if hasattr(post_process_class, 'character'):
config['Architecture']["Head"]['out_channels'] = len(
getattr(post_process_class, 'character'))
model = build_model(config['Architecture'])
init_model(config, model, logger)
# create data ops
transforms = []
use_padding = False
for op in config['Eval']['dataset']['transforms']:
op_name = list(op)[0]
if 'Label' in op_name:
continue
if op_name == 'KeepKeys':
op[op_name]['keep_keys'] = ['image']
if op_name == "ResizeTableImage":
use_padding = True
padding_max_len = op['ResizeTableImage']['max_len']
transforms.append(op)
global_config['infer_mode'] = True
ops = create_operators(transforms, global_config)
model.eval()
for file in get_image_file_list(config['Global']['infer_img']):
logger.info("infer_img: {}".format(file))
with open(file, 'rb') as f:
img = f.read()
data = {'image': img}
batch = transform(data, ops)
images = np.expand_dims(batch[0], axis=0)
images = paddle.to_tensor(images)
preds = model(images)
post_result = post_process_class(preds)
res_html_code = post_result['res_html_code']
res_loc = post_result['res_loc']
img = cv2.imread(file)
imgh, imgw = img.shape[0:2]
res_loc_final = []
for rno in range(len(res_loc[0])):
x0, y0, x1, y1 = res_loc[0][rno]
left = max(int(imgw * x0), 0)
top = max(int(imgh * y0), 0)
right = min(int(imgw * x1), imgw - 1)
bottom = min(int(imgh * y1), imgh - 1)
cv2.rectangle(img, (left, top), (right, bottom), (0, 0, 255), 2)
res_loc_final.append([left, top, right, bottom])
res_loc_str = json.dumps(res_loc_final)
logger.info("result: {}, {}".format(res_html_code, res_loc_final))
logger.info("success!")
if __name__ == '__main__':
config, device, logger, vdl_writer = program.preprocess()
main(config, device, logger, vdl_writer)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -186,6 +186,10 @@ def train(config, ...@@ -186,6 +186,10 @@ def train(config,
model.train() model.train()
use_srn = config['Architecture']['algorithm'] == "SRN" use_srn = config['Architecture']['algorithm'] == "SRN"
try:
model_type = config['Architecture']['model_type']
except:
model_type = None
if 'start_epoch' in best_model_dict: if 'start_epoch' in best_model_dict:
start_epoch = best_model_dict['start_epoch'] start_epoch = best_model_dict['start_epoch']
...@@ -208,9 +212,9 @@ def train(config, ...@@ -208,9 +212,9 @@ def train(config,
lr = optimizer.get_lr() lr = optimizer.get_lr()
images = batch[0] images = batch[0]
if use_srn: if use_srn:
others = batch[-4:]
preds = model(images, others)
model_average = True model_average = True
if use_srn or model_type == 'table':
preds = model(images, data=batch[1:])
else: else:
preds = model(images) preds = model(images)
loss = loss_class(preds, batch) loss = loss_class(preds, batch)
...@@ -232,8 +236,11 @@ def train(config, ...@@ -232,8 +236,11 @@ def train(config,
if cal_metric_during_train: # only rec and cls need if cal_metric_during_train: # only rec and cls need
batch = [item.numpy() for item in batch] batch = [item.numpy() for item in batch]
post_result = post_process_class(preds, batch[1]) if model_type == 'table':
eval_class(post_result, batch) eval_class(preds, batch)
else:
post_result = post_process_class(preds, batch[1])
eval_class(post_result, batch)
metric = eval_class.get_metric() metric = eval_class.get_metric()
train_stats.update(metric) train_stats.update(metric)
...@@ -269,6 +276,7 @@ def train(config, ...@@ -269,6 +276,7 @@ def train(config,
valid_dataloader, valid_dataloader,
post_process_class, post_process_class,
eval_class, eval_class,
model_type,
use_srn=use_srn) use_srn=use_srn)
cur_metric_str = 'cur metric, {}'.format(', '.join( cur_metric_str = 'cur metric, {}'.format(', '.join(
['{}: {}'.format(k, v) for k, v in cur_metric.items()])) ['{}: {}'.format(k, v) for k, v in cur_metric.items()]))
...@@ -336,7 +344,11 @@ def train(config, ...@@ -336,7 +344,11 @@ def train(config,
return return
def eval(model, valid_dataloader, post_process_class, eval_class, def eval(model,
valid_dataloader,
post_process_class,
eval_class,
model_type,
use_srn=False): use_srn=False):
model.eval() model.eval()
with paddle.no_grad(): with paddle.no_grad():
...@@ -350,19 +362,19 @@ def eval(model, valid_dataloader, post_process_class, eval_class, ...@@ -350,19 +362,19 @@ def eval(model, valid_dataloader, post_process_class, eval_class,
break break
images = batch[0] images = batch[0]
start = time.time() start = time.time()
if use_srn or model_type == 'table':
if use_srn: preds = model(images, data=batch[1:])
others = batch[-4:]
preds = model(images, others)
else: else:
preds = model(images) preds = model(images)
batch = [item.numpy() for item in batch] batch = [item.numpy() for item in batch]
# Obtain usable results from post-processing methods # Obtain usable results from post-processing methods
post_result = post_process_class(preds, batch[1])
total_time += time.time() - start total_time += time.time() - start
# Evaluate the results of the current batch # Evaluate the results of the current batch
eval_class(post_result, batch) if model_type == 'table':
eval_class(preds, batch)
else:
post_result = post_process_class(preds, batch[1])
eval_class(post_result, batch)
pbar.update(1) pbar.update(1)
total_frame += len(images) total_frame += len(images)
# Get final metric,eg. acc or hmean # Get final metric,eg. acc or hmean
...@@ -386,7 +398,7 @@ def preprocess(is_train=False): ...@@ -386,7 +398,7 @@ def preprocess(is_train=False):
alg = config['Architecture']['algorithm'] alg = config['Architecture']['algorithm']
assert alg in [ assert alg in [
'EAST', 'DB', 'SAST', 'Rosetta', 'CRNN', 'STARNet', 'RARE', 'SRN', 'EAST', 'DB', 'SAST', 'Rosetta', 'CRNN', 'STARNet', 'RARE', 'SRN',
'CLS', 'PGNet' 'CLS', 'PGNet', 'Distillation', 'TableAttn'
] ]
device = 'gpu:{}'.format(dist.ParallelEnv().dev_id) if use_gpu else 'cpu' device = 'gpu:{}'.format(dist.ParallelEnv().dev_id) if use_gpu else 'cpu'
......
...@@ -35,7 +35,7 @@ from ppocr.losses import build_loss ...@@ -35,7 +35,7 @@ from ppocr.losses import build_loss
from ppocr.optimizer import build_optimizer from ppocr.optimizer import build_optimizer
from ppocr.postprocess import build_post_process from ppocr.postprocess import build_post_process
from ppocr.metrics import build_metric from ppocr.metrics import build_metric
from ppocr.utils.save_load import init_model from ppocr.utils.save_load import init_model, load_dygraph_params
import tools.program as program import tools.program as program
dist.get_world_size() dist.get_world_size()
...@@ -72,7 +72,14 @@ def main(config, device, logger, vdl_writer): ...@@ -72,7 +72,14 @@ def main(config, device, logger, vdl_writer):
# for rec algorithm # for rec algorithm
if hasattr(post_process_class, 'character'): if hasattr(post_process_class, 'character'):
char_num = len(getattr(post_process_class, 'character')) char_num = len(getattr(post_process_class, 'character'))
config['Architecture']["Head"]['out_channels'] = char_num if config['Architecture']["algorithm"] in ["Distillation",
]: # distillation model
for key in config['Architecture']["Models"]:
config['Architecture']["Models"][key]["Head"][
'out_channels'] = char_num
else: # base rec model
config['Architecture']["Head"]['out_channels'] = char_num
model = build_model(config['Architecture']) model = build_model(config['Architecture'])
if config['Global']['distributed']: if config['Global']['distributed']:
model = paddle.DataParallel(model) model = paddle.DataParallel(model)
...@@ -90,8 +97,7 @@ def main(config, device, logger, vdl_writer): ...@@ -90,8 +97,7 @@ def main(config, device, logger, vdl_writer):
# build metric # build metric
eval_class = build_metric(config['Metric']) eval_class = build_metric(config['Metric'])
# load pretrain model # load pretrain model
pre_best_model_dict = init_model(config, model, logger, optimizer) pre_best_model_dict = load_dygraph_params(config, model, logger, optimizer)
logger.info('train dataloader has {} iters'.format(len(train_dataloader))) logger.info('train dataloader has {} iters'.format(len(train_dataloader)))
if valid_dataloader is not None: if valid_dataloader is not None:
logger.info('valid dataloader has {} iters'.format( logger.info('valid dataloader has {} iters'.format(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment