Commit 58ef7acb authored by LDOUBLEV's avatar LDOUBLEV
Browse files

Merge branch 'dygraph' of https://github.com/PaddlePaddle/PaddleOCR into fix_cpp

parents 38339287 5c664bf4
...@@ -26,12 +26,14 @@ def build_head(config): ...@@ -26,12 +26,14 @@ def build_head(config):
from .rec_ctc_head import CTCHead from .rec_ctc_head import CTCHead
from .rec_att_head import AttentionHead from .rec_att_head import AttentionHead
from .rec_srn_head import SRNHead from .rec_srn_head import SRNHead
from .rec_nrtr_head import Transformer
# cls head # cls head
from .cls_head import ClsHead from .cls_head import ClsHead
support_dict = [ support_dict = [
'DBHead', 'EASTHead', 'SASTHead', 'CTCHead', 'ClsHead', 'AttentionHead', 'DBHead', 'EASTHead', 'SASTHead', 'CTCHead', 'ClsHead', 'AttentionHead',
'SRNHead', 'PGHead', 'TableAttentionHead'] 'SRNHead', 'PGHead', 'Transformer', 'TableAttentionHead'
]
#table head #table head
from .table_att_head import TableAttentionHead from .table_att_head import TableAttentionHead
......
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
from paddle import nn
import paddle.nn.functional as F
from paddle.nn import Linear
from paddle.nn.initializer import XavierUniform as xavier_uniform_
from paddle.nn.initializer import Constant as constant_
from paddle.nn.initializer import XavierNormal as xavier_normal_
zeros_ = constant_(value=0.)
ones_ = constant_(value=1.)
class MultiheadAttention(nn.Layer):
"""Allows the model to jointly attend to information
from different representation subspaces.
See reference: Attention Is All You Need
.. math::
\text{MultiHead}(Q, K, V) = \text{Concat}(head_1,\dots,head_h)W^O
\text{where} head_i = \text{Attention}(QW_i^Q, KW_i^K, VW_i^V)
Args:
embed_dim: total dimension of the model
num_heads: parallel attention layers, or heads
"""
def __init__(self,
embed_dim,
num_heads,
dropout=0.,
bias=True,
add_bias_kv=False,
add_zero_attn=False):
super(MultiheadAttention, self).__init__()
self.embed_dim = embed_dim
self.num_heads = num_heads
self.dropout = dropout
self.head_dim = embed_dim // num_heads
assert self.head_dim * num_heads == self.embed_dim, "embed_dim must be divisible by num_heads"
self.scaling = self.head_dim**-0.5
self.out_proj = Linear(embed_dim, embed_dim, bias_attr=bias)
self._reset_parameters()
self.conv1 = paddle.nn.Conv2D(
in_channels=embed_dim, out_channels=embed_dim, kernel_size=(1, 1))
self.conv2 = paddle.nn.Conv2D(
in_channels=embed_dim, out_channels=embed_dim, kernel_size=(1, 1))
self.conv3 = paddle.nn.Conv2D(
in_channels=embed_dim, out_channels=embed_dim, kernel_size=(1, 1))
def _reset_parameters(self):
xavier_uniform_(self.out_proj.weight)
def forward(self,
query,
key,
value,
key_padding_mask=None,
incremental_state=None,
need_weights=True,
static_kv=False,
attn_mask=None):
"""
Inputs of forward function
query: [target length, batch size, embed dim]
key: [sequence length, batch size, embed dim]
value: [sequence length, batch size, embed dim]
key_padding_mask: if True, mask padding based on batch size
incremental_state: if provided, previous time steps are cashed
need_weights: output attn_output_weights
static_kv: key and value are static
Outputs of forward function
attn_output: [target length, batch size, embed dim]
attn_output_weights: [batch size, target length, sequence length]
"""
tgt_len, bsz, embed_dim = query.shape
assert embed_dim == self.embed_dim
assert list(query.shape) == [tgt_len, bsz, embed_dim]
assert key.shape == value.shape
q = self._in_proj_q(query)
k = self._in_proj_k(key)
v = self._in_proj_v(value)
q *= self.scaling
q = q.reshape([tgt_len, bsz * self.num_heads, self.head_dim]).transpose(
[1, 0, 2])
k = k.reshape([-1, bsz * self.num_heads, self.head_dim]).transpose(
[1, 0, 2])
v = v.reshape([-1, bsz * self.num_heads, self.head_dim]).transpose(
[1, 0, 2])
src_len = k.shape[1]
if key_padding_mask is not None:
assert key_padding_mask.shape[0] == bsz
assert key_padding_mask.shape[1] == src_len
attn_output_weights = paddle.bmm(q, k.transpose([0, 2, 1]))
assert list(attn_output_weights.
shape) == [bsz * self.num_heads, tgt_len, src_len]
if attn_mask is not None:
attn_mask = attn_mask.unsqueeze(0)
attn_output_weights += attn_mask
if key_padding_mask is not None:
attn_output_weights = attn_output_weights.reshape(
[bsz, self.num_heads, tgt_len, src_len])
key = key_padding_mask.unsqueeze(1).unsqueeze(2).astype('float32')
y = paddle.full(shape=key.shape, dtype='float32', fill_value='-inf')
y = paddle.where(key == 0., key, y)
attn_output_weights += y
attn_output_weights = attn_output_weights.reshape(
[bsz * self.num_heads, tgt_len, src_len])
attn_output_weights = F.softmax(
attn_output_weights.astype('float32'),
axis=-1,
dtype=paddle.float32 if attn_output_weights.dtype == paddle.float16
else attn_output_weights.dtype)
attn_output_weights = F.dropout(
attn_output_weights, p=self.dropout, training=self.training)
attn_output = paddle.bmm(attn_output_weights, v)
assert list(attn_output.
shape) == [bsz * self.num_heads, tgt_len, self.head_dim]
attn_output = attn_output.transpose([1, 0, 2]).reshape(
[tgt_len, bsz, embed_dim])
attn_output = self.out_proj(attn_output)
if need_weights:
# average attention weights over heads
attn_output_weights = attn_output_weights.reshape(
[bsz, self.num_heads, tgt_len, src_len])
attn_output_weights = attn_output_weights.sum(
axis=1) / self.num_heads
else:
attn_output_weights = None
return attn_output, attn_output_weights
def _in_proj_q(self, query):
query = query.transpose([1, 2, 0])
query = paddle.unsqueeze(query, axis=2)
res = self.conv1(query)
res = paddle.squeeze(res, axis=2)
res = res.transpose([2, 0, 1])
return res
def _in_proj_k(self, key):
key = key.transpose([1, 2, 0])
key = paddle.unsqueeze(key, axis=2)
res = self.conv2(key)
res = paddle.squeeze(res, axis=2)
res = res.transpose([2, 0, 1])
return res
def _in_proj_v(self, value):
value = value.transpose([1, 2, 0]) #(1, 2, 0)
value = paddle.unsqueeze(value, axis=2)
res = self.conv3(value)
res = paddle.squeeze(res, axis=2)
res = res.transpose([2, 0, 1])
return res
This diff is collapsed.
...@@ -24,18 +24,16 @@ __all__ = ['build_post_process'] ...@@ -24,18 +24,16 @@ __all__ = ['build_post_process']
from .db_postprocess import DBPostProcess, DistillationDBPostProcess from .db_postprocess import DBPostProcess, DistillationDBPostProcess
from .east_postprocess import EASTPostProcess from .east_postprocess import EASTPostProcess
from .sast_postprocess import SASTPostProcess from .sast_postprocess import SASTPostProcess
from .rec_postprocess import CTCLabelDecode, AttnLabelDecode, SRNLabelDecode, DistillationCTCLabelDecode, \ from .rec_postprocess import CTCLabelDecode, AttnLabelDecode, SRNLabelDecode, DistillationCTCLabelDecode, NRTRLabelDecode, \
TableLabelDecode TableLabelDecode
from .cls_postprocess import ClsPostProcess from .cls_postprocess import ClsPostProcess
from .pg_postprocess import PGPostProcess from .pg_postprocess import PGPostProcess
def build_post_process(config, global_config=None): def build_post_process(config, global_config=None):
support_dict = [ support_dict = [
'DBPostProcess', 'EASTPostProcess', 'SASTPostProcess', 'CTCLabelDecode', 'DBPostProcess', 'EASTPostProcess', 'SASTPostProcess', 'CTCLabelDecode',
'AttnLabelDecode', 'ClsPostProcess', 'SRNLabelDecode', 'PGPostProcess', 'AttnLabelDecode', 'ClsPostProcess', 'SRNLabelDecode', 'PGPostProcess',
'DistillationCTCLabelDecode', 'TableLabelDecode', 'DistillationCTCLabelDecode', 'NRTRLabelDecode', 'TableLabelDecode', 'DistillationDBPostProcess'
'DistillationDBPostProcess'
] ]
config = copy.deepcopy(config) config = copy.deepcopy(config)
......
...@@ -156,6 +156,69 @@ class DistillationCTCLabelDecode(CTCLabelDecode): ...@@ -156,6 +156,69 @@ class DistillationCTCLabelDecode(CTCLabelDecode):
return output return output
class NRTRLabelDecode(BaseRecLabelDecode):
""" Convert between text-label and text-index """
def __init__(self,
character_dict_path=None,
character_type='EN_symbol',
use_space_char=True,
**kwargs):
super(NRTRLabelDecode, self).__init__(character_dict_path,
character_type, use_space_char)
def __call__(self, preds, label=None, *args, **kwargs):
if preds.dtype == paddle.int64:
if isinstance(preds, paddle.Tensor):
preds = preds.numpy()
if preds[0][0]==2:
preds_idx = preds[:,1:]
else:
preds_idx = preds
text = self.decode(preds_idx)
if label is None:
return text
label = self.decode(label[:,1:])
else:
if isinstance(preds, paddle.Tensor):
preds = preds.numpy()
preds_idx = preds.argmax(axis=2)
preds_prob = preds.max(axis=2)
text = self.decode(preds_idx, preds_prob, is_remove_duplicate=False)
if label is None:
return text
label = self.decode(label[:,1:])
return text, label
def add_special_char(self, dict_character):
dict_character = ['blank','<unk>','<s>','</s>'] + dict_character
return dict_character
def decode(self, text_index, text_prob=None, is_remove_duplicate=False):
""" convert text-index into text-label. """
result_list = []
batch_size = len(text_index)
for batch_idx in range(batch_size):
char_list = []
conf_list = []
for idx in range(len(text_index[batch_idx])):
if text_index[batch_idx][idx] == 3: # end
break
try:
char_list.append(self.character[int(text_index[batch_idx][idx])])
except:
continue
if text_prob is not None:
conf_list.append(text_prob[batch_idx][idx])
else:
conf_list.append(1)
text = ''.join(char_list)
result_list.append((text.lower(), np.mean(conf_list)))
return result_list
class AttnLabelDecode(BaseRecLabelDecode): class AttnLabelDecode(BaseRecLabelDecode):
""" Convert between text-label and text-index """ """ Convert between text-label and text-index """
...@@ -193,8 +256,7 @@ class AttnLabelDecode(BaseRecLabelDecode): ...@@ -193,8 +256,7 @@ class AttnLabelDecode(BaseRecLabelDecode):
if idx > 0 and text_index[batch_idx][idx - 1] == text_index[ if idx > 0 and text_index[batch_idx][idx - 1] == text_index[
batch_idx][idx]: batch_idx][idx]:
continue continue
char_list.append(self.character[int(text_index[batch_idx][ char_list.append(self.character[int(text_index[batch_idx][idx])])
idx])])
if text_prob is not None: if text_prob is not None:
conf_list.append(text_prob[batch_idx][idx]) conf_list.append(text_prob[batch_idx][idx])
else: else:
......
...@@ -49,4 +49,19 @@ inference:tools/infer/predict_det.py ...@@ -49,4 +49,19 @@ inference:tools/infer/predict_det.py
--save_log_path:null --save_log_path:null
--benchmark:True --benchmark:True
null:null null:null
===========================cpp_infer_params===========================
use_opencv:True
infer_model:./inference/ch_ppocr_mobile_v2.0_det_infer/
infer_quant:False
inference:./deploy/cpp_infer/build/ppocr det
--use_gpu:True|False
--enable_mkldnn:True|False
--cpu_threads:1|6
--rec_batch_num:1
--use_tensorrt:False|True
--precision:fp32|fp16
--det_model_dir:
--image_dir:./inference/ch_det_data_50/all-sum-510/
--save_log_path:null
--benchmark:True
#!/bin/bash #!/bin/bash
FILENAME=$1 FILENAME=$1
# MODE be one of ['lite_train_infer' 'whole_infer' 'whole_train_infer', 'infer'] # MODE be one of ['lite_train_infer' 'whole_infer' 'whole_train_infer', 'infer', 'cpp_infer']
MODE=$2 MODE=$2
dataline=$(cat ${FILENAME}) dataline=$(cat ${FILENAME})
...@@ -59,7 +59,7 @@ elif [ ${MODE} = "whole_infer" ];then ...@@ -59,7 +59,7 @@ elif [ ${MODE} = "whole_infer" ];then
cd ./train_data/ && tar xf icdar2015_infer.tar && tar xf ic15_data.tar cd ./train_data/ && tar xf icdar2015_infer.tar && tar xf ic15_data.tar
ln -s ./icdar2015_infer ./icdar2015 ln -s ./icdar2015_infer ./icdar2015
cd ../ cd ../
else elif [ ${MODE} = "infer" ] || [ ${MODE} = "cpp_infer" ];then
if [ ${model_name} = "ocr_det" ]; then if [ ${model_name} = "ocr_det" ]; then
eval_model_name="ch_ppocr_mobile_v2.0_det_infer" eval_model_name="ch_ppocr_mobile_v2.0_det_infer"
rm -rf ./train_data/icdar2015 rm -rf ./train_data/icdar2015
...@@ -79,3 +79,72 @@ else ...@@ -79,3 +79,72 @@ else
fi fi
fi fi
if [ ${MODE} = "cpp_infer" ];then
cd deploy/cpp_infer
use_opencv=$(func_parser_value "${lines[52]}")
if [ ${use_opencv} = "True" ]; then
echo "################### build opencv ###################"
rm -rf 3.4.7.tar.gz opencv-3.4.7/
wget https://github.com/opencv/opencv/archive/3.4.7.tar.gz
tar -xf 3.4.7.tar.gz
cd opencv-3.4.7/
install_path=$(pwd)/opencv-3.4.7/opencv3
rm -rf build
mkdir build
cd build
cmake .. \
-DCMAKE_INSTALL_PREFIX=${install_path} \
-DCMAKE_BUILD_TYPE=Release \
-DBUILD_SHARED_LIBS=OFF \
-DWITH_IPP=OFF \
-DBUILD_IPP_IW=OFF \
-DWITH_LAPACK=OFF \
-DWITH_EIGEN=OFF \
-DCMAKE_INSTALL_LIBDIR=lib64 \
-DWITH_ZLIB=ON \
-DBUILD_ZLIB=ON \
-DWITH_JPEG=ON \
-DBUILD_JPEG=ON \
-DWITH_PNG=ON \
-DBUILD_PNG=ON \
-DWITH_TIFF=ON \
-DBUILD_TIFF=ON
make -j
make install
cd ../
echo "################### build opencv finished ###################"
fi
echo "################### build PaddleOCR demo ####################"
if [ ${use_opencv} = "True" ]; then
OPENCV_DIR=$(pwd)/opencv-3.4.7/opencv3/
else
OPENCV_DIR=''
fi
LIB_DIR=$(pwd)/Paddle/build/paddle_inference_install_dir/
CUDA_LIB_DIR=$(dirname `find /usr -name libcudart.so`)
CUDNN_LIB_DIR=$(dirname `find /usr -name libcudnn.so`)
BUILD_DIR=build
rm -rf ${BUILD_DIR}
mkdir ${BUILD_DIR}
cd ${BUILD_DIR}
cmake .. \
-DPADDLE_LIB=${LIB_DIR} \
-DWITH_MKL=ON \
-DWITH_GPU=OFF \
-DWITH_STATIC_LIB=OFF \
-DWITH_TENSORRT=OFF \
-DOPENCV_DIR=${OPENCV_DIR} \
-DCUDNN_LIB=${CUDNN_LIB_DIR} \
-DCUDA_LIB=${CUDA_LIB_DIR} \
-DTENSORRT_DIR=${TENSORRT_DIR} \
make -j
echo "################### build PaddleOCR demo finished ###################"
fi
\ No newline at end of file
#!/bin/bash #!/bin/bash
FILENAME=$1 FILENAME=$1
# MODE be one of ['lite_train_infer' 'whole_infer' 'whole_train_infer', 'infer'] # MODE be one of ['lite_train_infer' 'whole_infer' 'whole_train_infer', 'infer', 'cpp_infer']
MODE=$2 MODE=$2
dataline=$(cat ${FILENAME}) dataline=$(cat ${FILENAME})
...@@ -145,6 +145,33 @@ benchmark_value=$(func_parser_value "${lines[49]}") ...@@ -145,6 +145,33 @@ benchmark_value=$(func_parser_value "${lines[49]}")
infer_key1=$(func_parser_key "${lines[50]}") infer_key1=$(func_parser_key "${lines[50]}")
infer_value1=$(func_parser_value "${lines[50]}") infer_value1=$(func_parser_value "${lines[50]}")
if [ ${MODE} = "cpp_infer" ]; then
# parser cpp inference model
cpp_infer_model_dir_list=$(func_parser_value "${lines[53]}")
cpp_infer_is_quant=$(func_parser_value "${lines[54]}")
# parser cpp inference
inference_cmd=$(func_parser_value "${lines[55]}")
cpp_use_gpu_key=$(func_parser_key "${lines[56]}")
cpp_use_gpu_list=$(func_parser_value "${lines[56]}")
cpp_use_mkldnn_key=$(func_parser_key "${lines[57]}")
cpp_use_mkldnn_list=$(func_parser_value "${lines[57]}")
cpp_cpu_threads_key=$(func_parser_key "${lines[58]}")
cpp_cpu_threads_list=$(func_parser_value "${lines[58]}")
cpp_batch_size_key=$(func_parser_key "${lines[59]}")
cpp_batch_size_list=$(func_parser_value "${lines[59]}")
cpp_use_trt_key=$(func_parser_key "${lines[60]}")
cpp_use_trt_list=$(func_parser_value "${lines[60]}")
cpp_precision_key=$(func_parser_key "${lines[61]}")
cpp_precision_list=$(func_parser_value "${lines[61]}")
cpp_infer_model_key=$(func_parser_key "${lines[62]}")
cpp_image_dir_key=$(func_parser_key "${lines[63]}")
cpp_infer_img_dir=$(func_parser_value "${lines[63]}")
cpp_save_log_key=$(func_parser_key "${lines[64]}")
cpp_benchmark_key=$(func_parser_key "${lines[65]}")
cpp_benchmark_value=$(func_parser_value "${lines[65]}")
fi
LOG_PATH="./tests/output" LOG_PATH="./tests/output"
mkdir -p ${LOG_PATH} mkdir -p ${LOG_PATH}
status_log="${LOG_PATH}/results.log" status_log="${LOG_PATH}/results.log"
...@@ -218,6 +245,71 @@ function func_inference(){ ...@@ -218,6 +245,71 @@ function func_inference(){
done done
} }
function func_cpp_inference(){
IFS='|'
_script=$1
_model_dir=$2
_log_path=$3
_img_dir=$4
_flag_quant=$5
# inference
for use_gpu in ${cpp_use_gpu_list[*]}; do
if [ ${use_gpu} = "False" ] || [ ${use_gpu} = "cpu" ]; then
for use_mkldnn in ${cpp_use_mkldnn_list[*]}; do
if [ ${use_mkldnn} = "False" ] && [ ${_flag_quant} = "True" ]; then
continue
fi
for threads in ${cpp_cpu_threads_list[*]}; do
for batch_size in ${cpp_batch_size_list[*]}; do
_save_log_path="${_log_path}/cpp_infer_cpu_usemkldnn_${use_mkldnn}_threads_${threads}_batchsize_${batch_size}.log"
set_infer_data=$(func_set_params "${cpp_image_dir_key}" "${_img_dir}")
set_benchmark=$(func_set_params "${cpp_benchmark_key}" "${cpp_benchmark_value}")
set_batchsize=$(func_set_params "${cpp_batch_size_key}" "${batch_size}")
set_cpu_threads=$(func_set_params "${cpp_cpu_threads_key}" "${threads}")
set_model_dir=$(func_set_params "${cpp_infer_model_key}" "${_model_dir}")
command="${_script} ${cpp_use_gpu_key}=${use_gpu} ${cpp_use_mkldnn_key}=${use_mkldnn} ${set_cpu_threads} ${set_model_dir} ${set_batchsize} ${set_infer_data} ${set_benchmark} > ${_save_log_path} 2>&1 "
eval $command
last_status=${PIPESTATUS[0]}
eval "cat ${_save_log_path}"
status_check $last_status "${command}" "${status_log}"
done
done
done
elif [ ${use_gpu} = "True" ] || [ ${use_gpu} = "gpu" ]; then
for use_trt in ${cpp_use_trt_list[*]}; do
for precision in ${cpp_precision_list[*]}; do
if [[ ${_flag_quant} = "False" ]] && [[ ${precision} =~ "int8" ]]; then
continue
fi
if [[ ${precision} =~ "fp16" || ${precision} =~ "int8" ]] && [ ${use_trt} = "False" ]; then
continue
fi
if [[ ${use_trt} = "False" || ${precision} =~ "int8" ]] && [ ${_flag_quant} = "True" ]; then
continue
fi
for batch_size in ${cpp_batch_size_list[*]}; do
_save_log_path="${_log_path}/cpp_infer_gpu_usetrt_${use_trt}_precision_${precision}_batchsize_${batch_size}.log"
set_infer_data=$(func_set_params "${cpp_image_dir_key}" "${_img_dir}")
set_benchmark=$(func_set_params "${cpp_benchmark_key}" "${cpp_benchmark_value}")
set_batchsize=$(func_set_params "${cpp_batch_size_key}" "${batch_size}")
set_tensorrt=$(func_set_params "${cpp_use_trt_key}" "${use_trt}")
set_precision=$(func_set_params "${cpp_precision_key}" "${precision}")
set_model_dir=$(func_set_params "${cpp_infer_model_key}" "${_model_dir}")
command="${_script} ${cpp_use_gpu_key}=${use_gpu} ${set_tensorrt} ${set_precision} ${set_model_dir} ${set_batchsize} ${set_infer_data} ${set_benchmark} > ${_save_log_path} 2>&1 "
eval $command
last_status=${PIPESTATUS[0]}
eval "cat ${_save_log_path}"
status_check $last_status "${command}" "${status_log}"
done
done
done
else
echo "Does not support hardware other than CPU and GPU Currently!"
fi
done
}
if [ ${MODE} = "infer" ]; then if [ ${MODE} = "infer" ]; then
GPUID=$3 GPUID=$3
if [ ${#GPUID} -le 0 ];then if [ ${#GPUID} -le 0 ];then
...@@ -252,6 +344,25 @@ if [ ${MODE} = "infer" ]; then ...@@ -252,6 +344,25 @@ if [ ${MODE} = "infer" ]; then
Count=$(($Count + 1)) Count=$(($Count + 1))
done done
elif [ ${MODE} = "cpp_infer" ]; then
GPUID=$3
if [ ${#GPUID} -le 0 ];then
env=" "
else
env="export CUDA_VISIBLE_DEVICES=${GPUID}"
fi
# set CUDA_VISIBLE_DEVICES
eval $env
export Count=0
IFS="|"
infer_quant_flag=(${cpp_infer_is_quant})
for infer_model in ${cpp_infer_model_dir_list[*]}; do
#run inference
is_quant=${infer_quant_flag[Count]}
func_cpp_inference "${inference_cmd}" "${infer_model}" "${LOG_PATH}" "${cpp_infer_img_dir}" ${is_quant}
Count=$(($Count + 1))
done
else else
IFS="|" IFS="|"
export Count=0 export Count=0
......
...@@ -74,7 +74,7 @@ class TextE2E(object): ...@@ -74,7 +74,7 @@ class TextE2E(object):
self.preprocess_op = create_operators(pre_process_list) self.preprocess_op = create_operators(pre_process_list)
self.postprocess_op = build_post_process(postprocess_params) self.postprocess_op = build_post_process(postprocess_params)
self.predictor, self.input_tensor, self.output_tensors = utility.create_predictor( self.predictor, self.input_tensor, self.output_tensors, _ = utility.create_predictor(
args, 'e2e', logger) # paddle.jit.load(args.det_model_dir) args, 'e2e', logger) # paddle.jit.load(args.det_model_dir)
# self.predictor.eval() # self.predictor.eval()
......
...@@ -88,7 +88,7 @@ class TextRecognizer(object): ...@@ -88,7 +88,7 @@ class TextRecognizer(object):
def resize_norm_img(self, img, max_wh_ratio): def resize_norm_img(self, img, max_wh_ratio):
imgC, imgH, imgW = self.rec_image_shape imgC, imgH, imgW = self.rec_image_shape
assert imgC == img.shape[2] assert imgC == img.shape[2]
if self.character_type == "ch": max_wh_ratio = max(max_wh_ratio, imgW / imgH)
imgW = int((32 * max_wh_ratio)) imgW = int((32 * max_wh_ratio))
h, w = img.shape[:2] h, w = img.shape[:2]
ratio = w / float(h) ratio = w / float(h)
...@@ -278,7 +278,7 @@ def main(args): ...@@ -278,7 +278,7 @@ def main(args):
if args.warmup: if args.warmup:
img = np.random.uniform(0, 255, [32, 320, 3]).astype(np.uint8) img = np.random.uniform(0, 255, [32, 320, 3]).astype(np.uint8)
for i in range(2): for i in range(2):
res = text_recognizer([img]) res = text_recognizer([img] * int(args.rec_batch_num))
for image_file in image_file_list: for image_file in image_file_list:
img, flag = check_and_read_gif(image_file) img, flag = check_and_read_gif(image_file)
......
...@@ -186,6 +186,8 @@ def train(config, ...@@ -186,6 +186,8 @@ def train(config,
model.train() model.train()
use_srn = config['Architecture']['algorithm'] == "SRN" use_srn = config['Architecture']['algorithm'] == "SRN"
use_nrtr = config['Architecture']['algorithm'] == "NRTR"
try: try:
model_type = config['Architecture']['model_type'] model_type = config['Architecture']['model_type']
except: except:
...@@ -213,7 +215,7 @@ def train(config, ...@@ -213,7 +215,7 @@ def train(config,
images = batch[0] images = batch[0]
if use_srn: if use_srn:
model_average = True model_average = True
if use_srn or model_type == 'table': if use_srn or model_type == 'table' or use_nrtr:
preds = model(images, data=batch[1:]) preds = model(images, data=batch[1:])
else: else:
preds = model(images) preds = model(images)
...@@ -398,7 +400,7 @@ def preprocess(is_train=False): ...@@ -398,7 +400,7 @@ def preprocess(is_train=False):
alg = config['Architecture']['algorithm'] alg = config['Architecture']['algorithm']
assert alg in [ assert alg in [
'EAST', 'DB', 'SAST', 'Rosetta', 'CRNN', 'STARNet', 'RARE', 'SRN', 'EAST', 'DB', 'SAST', 'Rosetta', 'CRNN', 'STARNet', 'RARE', 'SRN',
'CLS', 'PGNet', 'Distillation', 'TableAttn' 'CLS', 'PGNet', 'Distillation', 'NRTR', 'TableAttn'
] ]
device = 'gpu:{}'.format(dist.ParallelEnv().dev_id) if use_gpu else 'cpu' device = 'gpu:{}'.format(dist.ParallelEnv().dev_id) if use_gpu else 'cpu'
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment