Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
paddle_dbnet
Commits
253b8453
Commit
253b8453
authored
Jun 28, 2021
by
Leif
Browse files
Merge remote-tracking branch 'origin/dygraph' into dygraph
parents
7cad4817
bc999986
Changes
113
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
836 additions
and
122 deletions
+836
-122
tools/infer/benchmark_utils.py
tools/infer/benchmark_utils.py
+232
-0
tools/infer/predict_cls.py
tools/infer/predict_cls.py
+18
-3
tools/infer/predict_det.py
tools/infer/predict_det.py
+23
-10
tools/infer/predict_rec.py
tools/infer/predict_rec.py
+78
-40
tools/infer/predict_system.py
tools/infer/predict_system.py
+96
-9
tools/infer/utility.py
tools/infer/utility.py
+219
-32
tools/infer_cls.py
tools/infer_cls.py
+1
-1
tools/infer_det.py
tools/infer_det.py
+2
-2
tools/infer_e2e.py
tools/infer_e2e.py
+1
-1
tools/infer_rec.py
tools/infer_rec.py
+27
-8
tools/infer_table.py
tools/infer_table.py
+107
-0
tools/program.py
tools/program.py
+23
-14
tools/train.py
tools/train.py
+9
-2
No files found.
tools/infer/benchmark_utils.py
0 → 100644
View file @
253b8453
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
argparse
import
os
import
time
import
logging
import
paddle
import
paddle.inference
as
paddle_infer
from
pathlib
import
Path
CUR_DIR
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
class
PaddleInferBenchmark
(
object
):
def
__init__
(
self
,
config
,
model_info
:
dict
=
{},
data_info
:
dict
=
{},
perf_info
:
dict
=
{},
resource_info
:
dict
=
{},
save_log_path
:
str
=
""
,
**
kwargs
):
"""
Construct PaddleInferBenchmark Class to format logs.
args:
config(paddle.inference.Config): paddle inference config
model_info(dict): basic model info
{'model_name': 'resnet50'
'precision': 'fp32'}
data_info(dict): input data info
{'batch_size': 1
'shape': '3,224,224'
'data_num': 1000}
perf_info(dict): performance result
{'preprocess_time_s': 1.0
'inference_time_s': 2.0
'postprocess_time_s': 1.0
'total_time_s': 4.0}
resource_info(dict):
cpu and gpu resources
{'cpu_rss': 100
'gpu_rss': 100
'gpu_util': 60}
"""
# PaddleInferBenchmark Log Version
self
.
log_version
=
1.0
# Paddle Version
self
.
paddle_version
=
paddle
.
__version__
self
.
paddle_commit
=
paddle
.
__git_commit__
paddle_infer_info
=
paddle_infer
.
get_version
()
self
.
paddle_branch
=
paddle_infer_info
.
strip
().
split
(
': '
)[
-
1
]
# model info
self
.
model_info
=
model_info
# data info
self
.
data_info
=
data_info
# perf info
self
.
perf_info
=
perf_info
try
:
self
.
model_name
=
model_info
[
'model_name'
]
self
.
precision
=
model_info
[
'precision'
]
self
.
batch_size
=
data_info
[
'batch_size'
]
self
.
shape
=
data_info
[
'shape'
]
self
.
data_num
=
data_info
[
'data_num'
]
self
.
preprocess_time_s
=
round
(
perf_info
[
'preprocess_time_s'
],
4
)
self
.
inference_time_s
=
round
(
perf_info
[
'inference_time_s'
],
4
)
self
.
postprocess_time_s
=
round
(
perf_info
[
'postprocess_time_s'
],
4
)
self
.
total_time_s
=
round
(
perf_info
[
'total_time_s'
],
4
)
except
:
self
.
print_help
()
raise
ValueError
(
"Set argument wrong, please check input argument and its type"
)
# conf info
self
.
config_status
=
self
.
parse_config
(
config
)
self
.
save_log_path
=
save_log_path
# mem info
if
isinstance
(
resource_info
,
dict
):
self
.
cpu_rss_mb
=
int
(
resource_info
.
get
(
'cpu_rss_mb'
,
0
))
self
.
gpu_rss_mb
=
int
(
resource_info
.
get
(
'gpu_rss_mb'
,
0
))
self
.
gpu_util
=
round
(
resource_info
.
get
(
'gpu_util'
,
0
),
2
)
else
:
self
.
cpu_rss_mb
=
0
self
.
gpu_rss_mb
=
0
self
.
gpu_util
=
0
# init benchmark logger
self
.
benchmark_logger
()
def
benchmark_logger
(
self
):
"""
benchmark logger
"""
# Init logger
FORMAT
=
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
log_output
=
f
"
{
self
.
save_log_path
}
/
{
self
.
model_name
}
.log"
Path
(
f
"
{
self
.
save_log_path
}
"
).
mkdir
(
parents
=
True
,
exist_ok
=
True
)
logging
.
basicConfig
(
level
=
logging
.
INFO
,
format
=
FORMAT
,
handlers
=
[
logging
.
FileHandler
(
filename
=
log_output
,
mode
=
'w'
),
logging
.
StreamHandler
(),
])
self
.
logger
=
logging
.
getLogger
(
__name__
)
self
.
logger
.
info
(
f
"Paddle Inference benchmark log will be saved to
{
log_output
}
"
)
def
parse_config
(
self
,
config
)
->
dict
:
"""
parse paddle predictor config
args:
config(paddle.inference.Config): paddle inference config
return:
config_status(dict): dict style config info
"""
config_status
=
{}
config_status
[
'runtime_device'
]
=
"gpu"
if
config
.
use_gpu
()
else
"cpu"
config_status
[
'ir_optim'
]
=
config
.
ir_optim
()
config_status
[
'enable_tensorrt'
]
=
config
.
tensorrt_engine_enabled
()
config_status
[
'precision'
]
=
self
.
precision
config_status
[
'enable_mkldnn'
]
=
config
.
mkldnn_enabled
()
config_status
[
'cpu_math_library_num_threads'
]
=
config
.
cpu_math_library_num_threads
(
)
return
config_status
def
report
(
self
,
identifier
=
None
):
"""
print log report
args:
identifier(string): identify log
"""
if
identifier
:
identifier
=
f
"[
{
identifier
}
]"
else
:
identifier
=
""
self
.
logger
.
info
(
"
\n
"
)
self
.
logger
.
info
(
"---------------------- Paddle info ----------------------"
)
self
.
logger
.
info
(
f
"
{
identifier
}
paddle_version:
{
self
.
paddle_version
}
"
)
self
.
logger
.
info
(
f
"
{
identifier
}
paddle_commit:
{
self
.
paddle_commit
}
"
)
self
.
logger
.
info
(
f
"
{
identifier
}
paddle_branch:
{
self
.
paddle_branch
}
"
)
self
.
logger
.
info
(
f
"
{
identifier
}
log_api_version:
{
self
.
log_version
}
"
)
self
.
logger
.
info
(
"----------------------- Conf info -----------------------"
)
self
.
logger
.
info
(
f
"
{
identifier
}
runtime_device:
{
self
.
config_status
[
'runtime_device'
]
}
"
)
self
.
logger
.
info
(
f
"
{
identifier
}
ir_optim:
{
self
.
config_status
[
'ir_optim'
]
}
"
)
self
.
logger
.
info
(
f
"
{
identifier
}
enable_memory_optim:
{
True
}
"
)
self
.
logger
.
info
(
f
"
{
identifier
}
enable_tensorrt:
{
self
.
config_status
[
'enable_tensorrt'
]
}
"
)
self
.
logger
.
info
(
f
"
{
identifier
}
enable_mkldnn:
{
self
.
config_status
[
'enable_mkldnn'
]
}
"
)
self
.
logger
.
info
(
f
"
{
identifier
}
cpu_math_library_num_threads:
{
self
.
config_status
[
'cpu_math_library_num_threads'
]
}
"
)
self
.
logger
.
info
(
"----------------------- Model info ----------------------"
)
self
.
logger
.
info
(
f
"
{
identifier
}
model_name:
{
self
.
model_name
}
"
)
self
.
logger
.
info
(
f
"
{
identifier
}
precision:
{
self
.
precision
}
"
)
self
.
logger
.
info
(
"----------------------- Data info -----------------------"
)
self
.
logger
.
info
(
f
"
{
identifier
}
batch_size:
{
self
.
batch_size
}
"
)
self
.
logger
.
info
(
f
"
{
identifier
}
input_shape:
{
self
.
shape
}
"
)
self
.
logger
.
info
(
f
"
{
identifier
}
data_num:
{
self
.
data_num
}
"
)
self
.
logger
.
info
(
"----------------------- Perf info -----------------------"
)
self
.
logger
.
info
(
f
"
{
identifier
}
cpu_rss(MB):
{
self
.
cpu_rss_mb
}
, gpu_rss(MB):
{
self
.
gpu_rss_mb
}
, gpu_util:
{
self
.
gpu_util
}
%"
)
self
.
logger
.
info
(
f
"
{
identifier
}
total time spent(s):
{
self
.
total_time_s
}
"
)
self
.
logger
.
info
(
f
"
{
identifier
}
preprocess_time(ms):
{
round
(
self
.
preprocess_time_s
*
1000
,
1
)
}
, inference_time(ms):
{
round
(
self
.
inference_time_s
*
1000
,
1
)
}
, postprocess_time(ms):
{
round
(
self
.
postprocess_time_s
*
1000
,
1
)
}
"
)
def
print_help
(
self
):
"""
print function help
"""
print
(
"""Usage:
==== Print inference benchmark logs. ====
config = paddle.inference.Config()
model_info = {'model_name': 'resnet50'
'precision': 'fp32'}
data_info = {'batch_size': 1
'shape': '3,224,224'
'data_num': 1000}
perf_info = {'preprocess_time_s': 1.0
'inference_time_s': 2.0
'postprocess_time_s': 1.0
'total_time_s': 4.0}
resource_info = {'cpu_rss_mb': 100
'gpu_rss_mb': 100
'gpu_util': 60}
log = PaddleInferBenchmark(config, model_info, data_info, perf_info, resource_info)
log('Test')
"""
)
def
__call__
(
self
,
identifier
=
None
):
"""
__call__
args:
identifier(string): identify log
"""
self
.
report
(
identifier
)
tools/infer/predict_cls.py
View file @
253b8453
...
@@ -45,9 +45,11 @@ class TextClassifier(object):
...
@@ -45,9 +45,11 @@ class TextClassifier(object):
"label_list"
:
args
.
label_list
,
"label_list"
:
args
.
label_list
,
}
}
self
.
postprocess_op
=
build_post_process
(
postprocess_params
)
self
.
postprocess_op
=
build_post_process
(
postprocess_params
)
self
.
predictor
,
self
.
input_tensor
,
self
.
output_tensors
=
\
self
.
predictor
,
self
.
input_tensor
,
self
.
output_tensors
,
_
=
\
utility
.
create_predictor
(
args
,
'cls'
,
logger
)
utility
.
create_predictor
(
args
,
'cls'
,
logger
)
self
.
cls_times
=
utility
.
Timer
()
def
resize_norm_img
(
self
,
img
):
def
resize_norm_img
(
self
,
img
):
imgC
,
imgH
,
imgW
=
self
.
cls_image_shape
imgC
,
imgH
,
imgW
=
self
.
cls_image_shape
h
=
img
.
shape
[
0
]
h
=
img
.
shape
[
0
]
...
@@ -83,7 +85,9 @@ class TextClassifier(object):
...
@@ -83,7 +85,9 @@ class TextClassifier(object):
cls_res
=
[[
''
,
0.0
]]
*
img_num
cls_res
=
[[
''
,
0.0
]]
*
img_num
batch_num
=
self
.
cls_batch_num
batch_num
=
self
.
cls_batch_num
elapse
=
0
elapse
=
0
self
.
cls_times
.
total_time
.
start
()
for
beg_img_no
in
range
(
0
,
img_num
,
batch_num
):
for
beg_img_no
in
range
(
0
,
img_num
,
batch_num
):
end_img_no
=
min
(
img_num
,
beg_img_no
+
batch_num
)
end_img_no
=
min
(
img_num
,
beg_img_no
+
batch_num
)
norm_img_batch
=
[]
norm_img_batch
=
[]
max_wh_ratio
=
0
max_wh_ratio
=
0
...
@@ -91,6 +95,7 @@ class TextClassifier(object):
...
@@ -91,6 +95,7 @@ class TextClassifier(object):
h
,
w
=
img_list
[
indices
[
ino
]].
shape
[
0
:
2
]
h
,
w
=
img_list
[
indices
[
ino
]].
shape
[
0
:
2
]
wh_ratio
=
w
*
1.0
/
h
wh_ratio
=
w
*
1.0
/
h
max_wh_ratio
=
max
(
max_wh_ratio
,
wh_ratio
)
max_wh_ratio
=
max
(
max_wh_ratio
,
wh_ratio
)
self
.
cls_times
.
preprocess_time
.
start
()
for
ino
in
range
(
beg_img_no
,
end_img_no
):
for
ino
in
range
(
beg_img_no
,
end_img_no
):
norm_img
=
self
.
resize_norm_img
(
img_list
[
indices
[
ino
]])
norm_img
=
self
.
resize_norm_img
(
img_list
[
indices
[
ino
]])
norm_img
=
norm_img
[
np
.
newaxis
,
:]
norm_img
=
norm_img
[
np
.
newaxis
,
:]
...
@@ -98,11 +103,17 @@ class TextClassifier(object):
...
@@ -98,11 +103,17 @@ class TextClassifier(object):
norm_img_batch
=
np
.
concatenate
(
norm_img_batch
)
norm_img_batch
=
np
.
concatenate
(
norm_img_batch
)
norm_img_batch
=
norm_img_batch
.
copy
()
norm_img_batch
=
norm_img_batch
.
copy
()
starttime
=
time
.
time
()
starttime
=
time
.
time
()
self
.
cls_times
.
preprocess_time
.
end
()
self
.
cls_times
.
inference_time
.
start
()
self
.
input_tensor
.
copy_from_cpu
(
norm_img_batch
)
self
.
input_tensor
.
copy_from_cpu
(
norm_img_batch
)
self
.
predictor
.
run
()
self
.
predictor
.
run
()
prob_out
=
self
.
output_tensors
[
0
].
copy_to_cpu
()
prob_out
=
self
.
output_tensors
[
0
].
copy_to_cpu
()
self
.
cls_times
.
inference_time
.
end
()
self
.
cls_times
.
postprocess_time
.
start
()
self
.
predictor
.
try_shrink_memory
()
self
.
predictor
.
try_shrink_memory
()
cls_result
=
self
.
postprocess_op
(
prob_out
)
cls_result
=
self
.
postprocess_op
(
prob_out
)
self
.
cls_times
.
postprocess_time
.
end
()
elapse
+=
time
.
time
()
-
starttime
elapse
+=
time
.
time
()
-
starttime
for
rno
in
range
(
len
(
cls_result
)):
for
rno
in
range
(
len
(
cls_result
)):
label
,
score
=
cls_result
[
rno
]
label
,
score
=
cls_result
[
rno
]
...
@@ -110,6 +121,9 @@ class TextClassifier(object):
...
@@ -110,6 +121,9 @@ class TextClassifier(object):
if
'180'
in
label
and
score
>
self
.
cls_thresh
:
if
'180'
in
label
and
score
>
self
.
cls_thresh
:
img_list
[
indices
[
beg_img_no
+
rno
]]
=
cv2
.
rotate
(
img_list
[
indices
[
beg_img_no
+
rno
]]
=
cv2
.
rotate
(
img_list
[
indices
[
beg_img_no
+
rno
]],
1
)
img_list
[
indices
[
beg_img_no
+
rno
]],
1
)
self
.
cls_times
.
total_time
.
end
()
self
.
cls_times
.
img_num
+=
img_num
elapse
=
self
.
cls_times
.
total_time
.
value
()
return
img_list
,
cls_res
,
elapse
return
img_list
,
cls_res
,
elapse
...
@@ -141,8 +155,9 @@ def main(args):
...
@@ -141,8 +155,9 @@ def main(args):
for
ino
in
range
(
len
(
img_list
)):
for
ino
in
range
(
len
(
img_list
)):
logger
.
info
(
"Predicts of {}:{}"
.
format
(
valid_image_file_list
[
ino
],
logger
.
info
(
"Predicts of {}:{}"
.
format
(
valid_image_file_list
[
ino
],
cls_res
[
ino
]))
cls_res
[
ino
]))
logger
.
info
(
"Total predict time for {} images, cost: {:.3f}"
.
format
(
logger
.
info
(
len
(
img_list
),
predict_time
))
"The predict time about text angle classify module is as follows: "
)
text_classifier
.
cls_times
.
info
(
average
=
False
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
...
tools/infer/predict_det.py
View file @
253b8453
...
@@ -31,6 +31,8 @@ from ppocr.utils.utility import get_image_file_list, check_and_read_gif
...
@@ -31,6 +31,8 @@ from ppocr.utils.utility import get_image_file_list, check_and_read_gif
from
ppocr.data
import
create_operators
,
transform
from
ppocr.data
import
create_operators
,
transform
from
ppocr.postprocess
import
build_post_process
from
ppocr.postprocess
import
build_post_process
# import tools.infer.benchmark_utils as benchmark_utils
logger
=
get_logger
()
logger
=
get_logger
()
...
@@ -41,7 +43,7 @@ class TextDetector(object):
...
@@ -41,7 +43,7 @@ class TextDetector(object):
pre_process_list
=
[{
pre_process_list
=
[{
'DetResizeForTest'
:
{
'DetResizeForTest'
:
{
'limit_side_len'
:
args
.
det_limit_side_len
,
'limit_side_len'
:
args
.
det_limit_side_len
,
'limit_type'
:
args
.
det_limit_type
'limit_type'
:
args
.
det_limit_type
,
}
}
},
{
},
{
'NormalizeImage'
:
{
'NormalizeImage'
:
{
...
@@ -95,9 +97,8 @@ class TextDetector(object):
...
@@ -95,9 +97,8 @@ class TextDetector(object):
self
.
preprocess_op
=
create_operators
(
pre_process_list
)
self
.
preprocess_op
=
create_operators
(
pre_process_list
)
self
.
postprocess_op
=
build_post_process
(
postprocess_params
)
self
.
postprocess_op
=
build_post_process
(
postprocess_params
)
self
.
predictor
,
self
.
input_tensor
,
self
.
output_tensors
=
utility
.
create_predictor
(
self
.
predictor
,
self
.
input_tensor
,
self
.
output_tensors
,
self
.
config
=
utility
.
create_predictor
(
args
,
'det'
,
logger
)
# paddle.jit.load(args.det_model_dir)
args
,
'det'
,
logger
)
# self.predictor.eval()
def
order_points_clockwise
(
self
,
pts
):
def
order_points_clockwise
(
self
,
pts
):
"""
"""
...
@@ -155,6 +156,8 @@ class TextDetector(object):
...
@@ -155,6 +156,8 @@ class TextDetector(object):
def
__call__
(
self
,
img
):
def
__call__
(
self
,
img
):
ori_im
=
img
.
copy
()
ori_im
=
img
.
copy
()
data
=
{
'image'
:
img
}
data
=
{
'image'
:
img
}
st
=
time
.
time
()
data
=
transform
(
data
,
self
.
preprocess_op
)
data
=
transform
(
data
,
self
.
preprocess_op
)
img
,
shape_list
=
data
img
,
shape_list
=
data
if
img
is
None
:
if
img
is
None
:
...
@@ -162,7 +165,6 @@ class TextDetector(object):
...
@@ -162,7 +165,6 @@ class TextDetector(object):
img
=
np
.
expand_dims
(
img
,
axis
=
0
)
img
=
np
.
expand_dims
(
img
,
axis
=
0
)
shape_list
=
np
.
expand_dims
(
shape_list
,
axis
=
0
)
shape_list
=
np
.
expand_dims
(
shape_list
,
axis
=
0
)
img
=
img
.
copy
()
img
=
img
.
copy
()
starttime
=
time
.
time
()
self
.
input_tensor
.
copy_from_cpu
(
img
)
self
.
input_tensor
.
copy_from_cpu
(
img
)
self
.
predictor
.
run
()
self
.
predictor
.
run
()
...
@@ -184,6 +186,7 @@ class TextDetector(object):
...
@@ -184,6 +186,7 @@ class TextDetector(object):
preds
[
'maps'
]
=
outputs
[
0
]
preds
[
'maps'
]
=
outputs
[
0
]
else
:
else
:
raise
NotImplementedError
raise
NotImplementedError
self
.
predictor
.
try_shrink_memory
()
self
.
predictor
.
try_shrink_memory
()
post_result
=
self
.
postprocess_op
(
preds
,
shape_list
)
post_result
=
self
.
postprocess_op
(
preds
,
shape_list
)
dt_boxes
=
post_result
[
0
][
'points'
]
dt_boxes
=
post_result
[
0
][
'points'
]
...
@@ -191,8 +194,9 @@ class TextDetector(object):
...
@@ -191,8 +194,9 @@ class TextDetector(object):
dt_boxes
=
self
.
filter_tag_det_res_only_clip
(
dt_boxes
,
ori_im
.
shape
)
dt_boxes
=
self
.
filter_tag_det_res_only_clip
(
dt_boxes
,
ori_im
.
shape
)
else
:
else
:
dt_boxes
=
self
.
filter_tag_det_res
(
dt_boxes
,
ori_im
.
shape
)
dt_boxes
=
self
.
filter_tag_det_res
(
dt_boxes
,
ori_im
.
shape
)
elapse
=
time
.
time
()
-
starttime
return
dt_boxes
,
elapse
et
=
time
.
time
()
return
dt_boxes
,
et
-
st
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
@@ -202,6 +206,14 @@ if __name__ == "__main__":
...
@@ -202,6 +206,14 @@ if __name__ == "__main__":
count
=
0
count
=
0
total_time
=
0
total_time
=
0
draw_img_save
=
"./inference_results"
draw_img_save
=
"./inference_results"
if
args
.
warmup
:
img
=
np
.
random
.
uniform
(
0
,
255
,
[
640
,
640
,
3
]).
astype
(
np
.
uint8
)
for
i
in
range
(
10
):
res
=
text_detector
(
img
)
cpu_mem
,
gpu_mem
,
gpu_util
=
0
,
0
,
0
if
not
os
.
path
.
exists
(
draw_img_save
):
if
not
os
.
path
.
exists
(
draw_img_save
):
os
.
makedirs
(
draw_img_save
)
os
.
makedirs
(
draw_img_save
)
for
image_file
in
image_file_list
:
for
image_file
in
image_file_list
:
...
@@ -211,10 +223,13 @@ if __name__ == "__main__":
...
@@ -211,10 +223,13 @@ if __name__ == "__main__":
if
img
is
None
:
if
img
is
None
:
logger
.
info
(
"error in loading image:{}"
.
format
(
image_file
))
logger
.
info
(
"error in loading image:{}"
.
format
(
image_file
))
continue
continue
dt_boxes
,
elapse
=
text_detector
(
img
)
st
=
time
.
time
()
dt_boxes
,
_
=
text_detector
(
img
)
elapse
=
time
.
time
()
-
st
if
count
>
0
:
if
count
>
0
:
total_time
+=
elapse
total_time
+=
elapse
count
+=
1
count
+=
1
logger
.
info
(
"Predict time of {}: {}"
.
format
(
image_file
,
elapse
))
logger
.
info
(
"Predict time of {}: {}"
.
format
(
image_file
,
elapse
))
src_im
=
utility
.
draw_text_det_res
(
dt_boxes
,
image_file
)
src_im
=
utility
.
draw_text_det_res
(
dt_boxes
,
image_file
)
img_name_pure
=
os
.
path
.
split
(
image_file
)[
-
1
]
img_name_pure
=
os
.
path
.
split
(
image_file
)[
-
1
]
...
@@ -222,5 +237,3 @@ if __name__ == "__main__":
...
@@ -222,5 +237,3 @@ if __name__ == "__main__":
"det_res_{}"
.
format
(
img_name_pure
))
"det_res_{}"
.
format
(
img_name_pure
))
cv2
.
imwrite
(
img_path
,
src_im
)
cv2
.
imwrite
(
img_path
,
src_im
)
logger
.
info
(
"The visualized image saved in {}"
.
format
(
img_path
))
logger
.
info
(
"The visualized image saved in {}"
.
format
(
img_path
))
if
count
>
1
:
logger
.
info
(
"Avg Time: {}"
.
format
(
total_time
/
(
count
-
1
)))
tools/infer/predict_rec.py
View file @
253b8453
...
@@ -28,6 +28,7 @@ import traceback
...
@@ -28,6 +28,7 @@ import traceback
import
paddle
import
paddle
import
tools.infer.utility
as
utility
import
tools.infer.utility
as
utility
import
tools.infer.benchmark_utils
as
benchmark_utils
from
ppocr.postprocess
import
build_post_process
from
ppocr.postprocess
import
build_post_process
from
ppocr.utils.logging
import
get_logger
from
ppocr.utils.logging
import
get_logger
from
ppocr.utils.utility
import
get_image_file_list
,
check_and_read_gif
from
ppocr.utils.utility
import
get_image_file_list
,
check_and_read_gif
...
@@ -41,7 +42,6 @@ class TextRecognizer(object):
...
@@ -41,7 +42,6 @@ class TextRecognizer(object):
self
.
character_type
=
args
.
rec_char_type
self
.
character_type
=
args
.
rec_char_type
self
.
rec_batch_num
=
args
.
rec_batch_num
self
.
rec_batch_num
=
args
.
rec_batch_num
self
.
rec_algorithm
=
args
.
rec_algorithm
self
.
rec_algorithm
=
args
.
rec_algorithm
self
.
max_text_length
=
args
.
max_text_length
postprocess_params
=
{
postprocess_params
=
{
'name'
:
'CTCLabelDecode'
,
'name'
:
'CTCLabelDecode'
,
"character_type"
:
args
.
rec_char_type
,
"character_type"
:
args
.
rec_char_type
,
...
@@ -63,9 +63,11 @@ class TextRecognizer(object):
...
@@ -63,9 +63,11 @@ class TextRecognizer(object):
"use_space_char"
:
args
.
use_space_char
"use_space_char"
:
args
.
use_space_char
}
}
self
.
postprocess_op
=
build_post_process
(
postprocess_params
)
self
.
postprocess_op
=
build_post_process
(
postprocess_params
)
self
.
predictor
,
self
.
input_tensor
,
self
.
output_tensors
=
\
self
.
predictor
,
self
.
input_tensor
,
self
.
output_tensors
,
self
.
config
=
\
utility
.
create_predictor
(
args
,
'rec'
,
logger
)
utility
.
create_predictor
(
args
,
'rec'
,
logger
)
self
.
rec_times
=
utility
.
Timer
()
def
resize_norm_img
(
self
,
img
,
max_wh_ratio
):
def
resize_norm_img
(
self
,
img
,
max_wh_ratio
):
imgC
,
imgH
,
imgW
=
self
.
rec_image_shape
imgC
,
imgH
,
imgW
=
self
.
rec_image_shape
assert
imgC
==
img
.
shape
[
2
]
assert
imgC
==
img
.
shape
[
2
]
...
@@ -166,17 +168,15 @@ class TextRecognizer(object):
...
@@ -166,17 +168,15 @@ class TextRecognizer(object):
width_list
.
append
(
img
.
shape
[
1
]
/
float
(
img
.
shape
[
0
]))
width_list
.
append
(
img
.
shape
[
1
]
/
float
(
img
.
shape
[
0
]))
# Sorting can speed up the recognition process
# Sorting can speed up the recognition process
indices
=
np
.
argsort
(
np
.
array
(
width_list
))
indices
=
np
.
argsort
(
np
.
array
(
width_list
))
self
.
rec_times
.
total_time
.
start
()
# rec_res = []
rec_res
=
[[
''
,
0.0
]]
*
img_num
rec_res
=
[[
''
,
0.0
]]
*
img_num
batch_num
=
self
.
rec_batch_num
batch_num
=
self
.
rec_batch_num
elapse
=
0
for
beg_img_no
in
range
(
0
,
img_num
,
batch_num
):
for
beg_img_no
in
range
(
0
,
img_num
,
batch_num
):
end_img_no
=
min
(
img_num
,
beg_img_no
+
batch_num
)
end_img_no
=
min
(
img_num
,
beg_img_no
+
batch_num
)
norm_img_batch
=
[]
norm_img_batch
=
[]
max_wh_ratio
=
0
max_wh_ratio
=
0
self
.
rec_times
.
preprocess_time
.
start
()
for
ino
in
range
(
beg_img_no
,
end_img_no
):
for
ino
in
range
(
beg_img_no
,
end_img_no
):
# h, w = img_list[ino].shape[0:2]
h
,
w
=
img_list
[
indices
[
ino
]].
shape
[
0
:
2
]
h
,
w
=
img_list
[
indices
[
ino
]].
shape
[
0
:
2
]
wh_ratio
=
w
*
1.0
/
h
wh_ratio
=
w
*
1.0
/
h
max_wh_ratio
=
max
(
max_wh_ratio
,
wh_ratio
)
max_wh_ratio
=
max
(
max_wh_ratio
,
wh_ratio
)
...
@@ -187,9 +187,8 @@ class TextRecognizer(object):
...
@@ -187,9 +187,8 @@ class TextRecognizer(object):
norm_img
=
norm_img
[
np
.
newaxis
,
:]
norm_img
=
norm_img
[
np
.
newaxis
,
:]
norm_img_batch
.
append
(
norm_img
)
norm_img_batch
.
append
(
norm_img
)
else
:
else
:
norm_img
=
self
.
process_image_srn
(
img_list
[
indices
[
ino
]],
norm_img
=
self
.
process_image_srn
(
self
.
rec_image_shape
,
8
,
img_list
[
indices
[
ino
]],
self
.
rec_image_shape
,
8
,
25
)
self
.
max_text_length
)
encoder_word_pos_list
=
[]
encoder_word_pos_list
=
[]
gsrm_word_pos_list
=
[]
gsrm_word_pos_list
=
[]
gsrm_slf_attn_bias1_list
=
[]
gsrm_slf_attn_bias1_list
=
[]
...
@@ -203,7 +202,6 @@ class TextRecognizer(object):
...
@@ -203,7 +202,6 @@ class TextRecognizer(object):
norm_img_batch
=
norm_img_batch
.
copy
()
norm_img_batch
=
norm_img_batch
.
copy
()
if
self
.
rec_algorithm
==
"SRN"
:
if
self
.
rec_algorithm
==
"SRN"
:
starttime
=
time
.
time
()
encoder_word_pos_list
=
np
.
concatenate
(
encoder_word_pos_list
)
encoder_word_pos_list
=
np
.
concatenate
(
encoder_word_pos_list
)
gsrm_word_pos_list
=
np
.
concatenate
(
gsrm_word_pos_list
)
gsrm_word_pos_list
=
np
.
concatenate
(
gsrm_word_pos_list
)
gsrm_slf_attn_bias1_list
=
np
.
concatenate
(
gsrm_slf_attn_bias1_list
=
np
.
concatenate
(
...
@@ -218,19 +216,23 @@ class TextRecognizer(object):
...
@@ -218,19 +216,23 @@ class TextRecognizer(object):
gsrm_slf_attn_bias1_list
,
gsrm_slf_attn_bias1_list
,
gsrm_slf_attn_bias2_list
,
gsrm_slf_attn_bias2_list
,
]
]
self
.
rec_times
.
preprocess_time
.
end
()
self
.
rec_times
.
inference_time
.
start
()
input_names
=
self
.
predictor
.
get_input_names
()
input_names
=
self
.
predictor
.
get_input_names
()
for
i
in
range
(
len
(
input_names
)):
for
i
in
range
(
len
(
input_names
)):
input_tensor
=
self
.
predictor
.
get_input_handle
(
input_names
[
input_tensor
=
self
.
predictor
.
get_input_handle
(
input_names
[
i
])
i
])
input_tensor
.
copy_from_cpu
(
inputs
[
i
])
input_tensor
.
copy_from_cpu
(
inputs
[
i
])
self
.
predictor
.
run
()
self
.
predictor
.
run
()
self
.
rec_times
.
inference_time
.
end
()
outputs
=
[]
outputs
=
[]
for
output_tensor
in
self
.
output_tensors
:
for
output_tensor
in
self
.
output_tensors
:
output
=
output_tensor
.
copy_to_cpu
()
output
=
output_tensor
.
copy_to_cpu
()
outputs
.
append
(
output
)
outputs
.
append
(
output
)
preds
=
{
"predict"
:
outputs
[
2
]}
preds
=
{
"predict"
:
outputs
[
2
]}
else
:
else
:
starttime
=
time
.
time
()
self
.
rec_times
.
preprocess_time
.
end
()
self
.
rec_times
.
inference_time
.
start
()
self
.
input_tensor
.
copy_from_cpu
(
norm_img_batch
)
self
.
input_tensor
.
copy_from_cpu
(
norm_img_batch
)
self
.
predictor
.
run
()
self
.
predictor
.
run
()
...
@@ -239,22 +241,33 @@ class TextRecognizer(object):
...
@@ -239,22 +241,33 @@ class TextRecognizer(object):
output
=
output_tensor
.
copy_to_cpu
()
output
=
output_tensor
.
copy_to_cpu
()
outputs
.
append
(
output
)
outputs
.
append
(
output
)
preds
=
outputs
[
0
]
preds
=
outputs
[
0
]
self
.
predictor
.
try_shrink_memory
()
self
.
rec_times
.
inference_time
.
end
()
self
.
rec_times
.
postprocess_time
.
start
()
rec_result
=
self
.
postprocess_op
(
preds
)
rec_result
=
self
.
postprocess_op
(
preds
)
for
rno
in
range
(
len
(
rec_result
)):
for
rno
in
range
(
len
(
rec_result
)):
rec_res
[
indices
[
beg_img_no
+
rno
]]
=
rec_result
[
rno
]
rec_res
[
indices
[
beg_img_no
+
rno
]]
=
rec_result
[
rno
]
elapse
+=
time
.
time
()
-
starttime
self
.
rec_times
.
postprocess_time
.
end
()
return
rec_res
,
elapse
self
.
rec_times
.
img_num
+=
int
(
norm_img_batch
.
shape
[
0
])
self
.
rec_times
.
total_time
.
end
()
return
rec_res
,
self
.
rec_times
.
total_time
.
value
()
def
main
(
args
):
def
main
(
args
):
image_file_list
=
get_image_file_list
(
args
.
image_dir
)
image_file_list
=
get_image_file_list
(
args
.
image_dir
)
text_recognizer
=
TextRecognizer
(
args
)
text_recognizer
=
TextRecognizer
(
args
)
total_run_time
=
0.0
total_images_num
=
0
valid_image_file_list
=
[]
valid_image_file_list
=
[]
img_list
=
[]
img_list
=
[]
for
idx
,
image_file
in
enumerate
(
image_file_list
):
# warmup 10 times
if
args
.
warmup
:
img
=
np
.
random
.
uniform
(
0
,
255
,
[
32
,
320
,
3
]).
astype
(
np
.
uint8
)
for
i
in
range
(
10
):
res
=
text_recognizer
([
img
])
cpu_mem
,
gpu_mem
,
gpu_util
=
0
,
0
,
0
count
=
0
for
image_file
in
image_file_list
:
img
,
flag
=
check_and_read_gif
(
image_file
)
img
,
flag
=
check_and_read_gif
(
image_file
)
if
not
flag
:
if
not
flag
:
img
=
cv2
.
imread
(
image_file
)
img
=
cv2
.
imread
(
image_file
)
...
@@ -263,29 +276,54 @@ def main(args):
...
@@ -263,29 +276,54 @@ def main(args):
continue
continue
valid_image_file_list
.
append
(
image_file
)
valid_image_file_list
.
append
(
image_file
)
img_list
.
append
(
img
)
img_list
.
append
(
img
)
if
len
(
img_list
)
>=
args
.
rec_batch_num
or
idx
==
len
(
try
:
image_file_list
)
-
1
:
rec_res
,
_
=
text_recognizer
(
img_list
)
try
:
if
args
.
benchmark
:
rec_res
,
predict_time
=
text_recognizer
(
img_list
)
cm
,
gm
,
gu
=
utility
.
get_current_memory_mb
(
0
)
total_run_time
+=
predict_time
cpu_mem
+=
cm
except
:
gpu_mem
+=
gm
logger
.
info
(
traceback
.
format_exc
())
gpu_util
+=
gu
logger
.
info
(
count
+=
1
"ERROR!!!!
\n
"
"Please read the FAQ:https://github.com/PaddlePaddle/PaddleOCR#faq
\n
"
except
Exception
as
E
:
"If your model has tps module: "
logger
.
info
(
traceback
.
format_exc
())
"TPS does not support variable shape.
\n
"
logger
.
info
(
E
)
"Please set --rec_image_shape='3,32,100' and --rec_char_type='en' "
exit
()
)
for
ino
in
range
(
len
(
img_list
)):
exit
()
logger
.
info
(
"Predicts of {}:{}"
.
format
(
valid_image_file_list
[
ino
],
for
ino
in
range
(
len
(
img_list
)):
rec_res
[
ino
]))
logger
.
info
(
"Predicts of {}:{}"
.
format
(
valid_image_file_list
[
if
args
.
benchmark
:
ino
],
rec_res
[
ino
]))
mems
=
{
total_images_num
+=
len
(
valid_image_file_list
)
'cpu_rss_mb'
:
cpu_mem
/
count
,
valid_image_file_list
=
[]
'gpu_rss_mb'
:
gpu_mem
/
count
,
img_list
=
[]
'gpu_util'
:
gpu_util
*
100
/
count
logger
.
info
(
"Total predict time for {} images, cost: {:.3f}"
.
format
(
}
total_images_num
,
total_run_time
))
else
:
mems
=
None
logger
.
info
(
"The predict time about recognizer module is as follows: "
)
rec_time_dict
=
text_recognizer
.
rec_times
.
report
(
average
=
True
)
rec_model_name
=
args
.
rec_model_dir
if
args
.
benchmark
:
# construct log information
model_info
=
{
'model_name'
:
args
.
rec_model_dir
.
split
(
'/'
)[
-
1
],
'precision'
:
args
.
precision
}
data_info
=
{
'batch_size'
:
args
.
rec_batch_num
,
'shape'
:
'dynamic_shape'
,
'data_num'
:
rec_time_dict
[
'img_num'
]
}
perf_info
=
{
'preprocess_time_s'
:
rec_time_dict
[
'preprocess_time'
],
'inference_time_s'
:
rec_time_dict
[
'inference_time'
],
'postprocess_time_s'
:
rec_time_dict
[
'postprocess_time'
],
'total_time_s'
:
rec_time_dict
[
'total_time'
]
}
benchmark_log
=
benchmark_utils
.
PaddleInferBenchmark
(
text_recognizer
.
config
,
model_info
,
data_info
,
perf_info
,
mems
)
benchmark_log
(
"Rec"
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
...
tools/infer/predict_system.py
View file @
253b8453
...
@@ -25,6 +25,7 @@ import cv2
...
@@ -25,6 +25,7 @@ import cv2
import
copy
import
copy
import
numpy
as
np
import
numpy
as
np
import
time
import
time
import
logging
from
PIL
import
Image
from
PIL
import
Image
import
tools.infer.utility
as
utility
import
tools.infer.utility
as
utility
import
tools.infer.predict_rec
as
predict_rec
import
tools.infer.predict_rec
as
predict_rec
...
@@ -32,13 +33,16 @@ import tools.infer.predict_det as predict_det
...
@@ -32,13 +33,16 @@ import tools.infer.predict_det as predict_det
import
tools.infer.predict_cls
as
predict_cls
import
tools.infer.predict_cls
as
predict_cls
from
ppocr.utils.utility
import
get_image_file_list
,
check_and_read_gif
from
ppocr.utils.utility
import
get_image_file_list
,
check_and_read_gif
from
ppocr.utils.logging
import
get_logger
from
ppocr.utils.logging
import
get_logger
from
tools.infer.utility
import
draw_ocr_box_txt
from
tools.infer.utility
import
draw_ocr_box_txt
,
get_current_memory_mb
import
tools.infer.benchmark_utils
as
benchmark_utils
logger
=
get_logger
()
logger
=
get_logger
()
class
TextSystem
(
object
):
class
TextSystem
(
object
):
def
__init__
(
self
,
args
):
def
__init__
(
self
,
args
):
if
not
args
.
show_log
:
logger
.
setLevel
(
logging
.
INFO
)
self
.
text_detector
=
predict_det
.
TextDetector
(
args
)
self
.
text_detector
=
predict_det
.
TextDetector
(
args
)
self
.
text_recognizer
=
predict_rec
.
TextRecognizer
(
args
)
self
.
text_recognizer
=
predict_rec
.
TextRecognizer
(
args
)
self
.
use_angle_cls
=
args
.
use_angle_cls
self
.
use_angle_cls
=
args
.
use_angle_cls
...
@@ -85,10 +89,11 @@ class TextSystem(object):
...
@@ -85,10 +89,11 @@ class TextSystem(object):
cv2
.
imwrite
(
"./output/img_crop_%d.jpg"
%
bno
,
img_crop_list
[
bno
])
cv2
.
imwrite
(
"./output/img_crop_%d.jpg"
%
bno
,
img_crop_list
[
bno
])
logger
.
info
(
bno
,
rec_res
[
bno
])
logger
.
info
(
bno
,
rec_res
[
bno
])
def
__call__
(
self
,
img
):
def
__call__
(
self
,
img
,
cls
=
True
):
ori_im
=
img
.
copy
()
ori_im
=
img
.
copy
()
dt_boxes
,
elapse
=
self
.
text_detector
(
img
)
dt_boxes
,
elapse
=
self
.
text_detector
(
img
)
logger
.
info
(
"dt_boxes num : {}, elapse : {}"
.
format
(
logger
.
debug
(
"dt_boxes num : {}, elapse : {}"
.
format
(
len
(
dt_boxes
),
elapse
))
len
(
dt_boxes
),
elapse
))
if
dt_boxes
is
None
:
if
dt_boxes
is
None
:
return
None
,
None
return
None
,
None
...
@@ -100,14 +105,14 @@ class TextSystem(object):
...
@@ -100,14 +105,14 @@ class TextSystem(object):
tmp_box
=
copy
.
deepcopy
(
dt_boxes
[
bno
])
tmp_box
=
copy
.
deepcopy
(
dt_boxes
[
bno
])
img_crop
=
self
.
get_rotate_crop_image
(
ori_im
,
tmp_box
)
img_crop
=
self
.
get_rotate_crop_image
(
ori_im
,
tmp_box
)
img_crop_list
.
append
(
img_crop
)
img_crop_list
.
append
(
img_crop
)
if
self
.
use_angle_cls
:
if
self
.
use_angle_cls
and
cls
:
img_crop_list
,
angle_list
,
elapse
=
self
.
text_classifier
(
img_crop_list
,
angle_list
,
elapse
=
self
.
text_classifier
(
img_crop_list
)
img_crop_list
)
logger
.
info
(
"cls num : {}, elapse : {}"
.
format
(
logger
.
debug
(
"cls num : {}, elapse : {}"
.
format
(
len
(
img_crop_list
),
elapse
))
len
(
img_crop_list
),
elapse
))
rec_res
,
elapse
=
self
.
text_recognizer
(
img_crop_list
)
rec_res
,
elapse
=
self
.
text_recognizer
(
img_crop_list
)
logger
.
info
(
"rec_res num : {}, elapse : {}"
.
format
(
logger
.
debug
(
"rec_res num : {}, elapse : {}"
.
format
(
len
(
rec_res
),
elapse
))
len
(
rec_res
),
elapse
))
# self.print_draw_crop_rec_res(img_crop_list, rec_res)
# self.print_draw_crop_rec_res(img_crop_list, rec_res)
filter_boxes
,
filter_rec_res
=
[],
[]
filter_boxes
,
filter_rec_res
=
[],
[]
...
@@ -147,7 +152,19 @@ def main(args):
...
@@ -147,7 +152,19 @@ def main(args):
is_visualize
=
True
is_visualize
=
True
font_path
=
args
.
vis_font_path
font_path
=
args
.
vis_font_path
drop_score
=
args
.
drop_score
drop_score
=
args
.
drop_score
for
image_file
in
image_file_list
:
# warm up 10 times
if
args
.
warmup
:
img
=
np
.
random
.
uniform
(
0
,
255
,
[
640
,
640
,
3
]).
astype
(
np
.
uint8
)
for
i
in
range
(
10
):
res
=
text_sys
(
img
)
total_time
=
0
cpu_mem
,
gpu_mem
,
gpu_util
=
0
,
0
,
0
_st
=
time
.
time
()
count
=
0
for
idx
,
image_file
in
enumerate
(
image_file_list
):
img
,
flag
=
check_and_read_gif
(
image_file
)
img
,
flag
=
check_and_read_gif
(
image_file
)
if
not
flag
:
if
not
flag
:
img
=
cv2
.
imread
(
image_file
)
img
=
cv2
.
imread
(
image_file
)
...
@@ -157,8 +174,16 @@ def main(args):
...
@@ -157,8 +174,16 @@ def main(args):
starttime
=
time
.
time
()
starttime
=
time
.
time
()
dt_boxes
,
rec_res
=
text_sys
(
img
)
dt_boxes
,
rec_res
=
text_sys
(
img
)
elapse
=
time
.
time
()
-
starttime
elapse
=
time
.
time
()
-
starttime
logger
.
info
(
"Predict time of %s: %.3fs"
%
(
image_file
,
elapse
))
total_time
+=
elapse
if
args
.
benchmark
and
idx
%
20
==
0
:
cm
,
gm
,
gu
=
get_current_memory_mb
(
0
)
cpu_mem
+=
cm
gpu_mem
+=
gm
gpu_util
+=
gu
count
+=
1
logger
.
info
(
str
(
idx
)
+
" Predict time of %s: %.3fs"
%
(
image_file
,
elapse
))
for
text
,
score
in
rec_res
:
for
text
,
score
in
rec_res
:
logger
.
info
(
"{}, {:.3f}"
.
format
(
text
,
score
))
logger
.
info
(
"{}, {:.3f}"
.
format
(
text
,
score
))
...
@@ -178,12 +203,74 @@ def main(args):
...
@@ -178,12 +203,74 @@ def main(args):
draw_img_save
=
"./inference_results/"
draw_img_save
=
"./inference_results/"
if
not
os
.
path
.
exists
(
draw_img_save
):
if
not
os
.
path
.
exists
(
draw_img_save
):
os
.
makedirs
(
draw_img_save
)
os
.
makedirs
(
draw_img_save
)
if
flag
:
image_file
=
image_file
[:
-
3
]
+
"png"
cv2
.
imwrite
(
cv2
.
imwrite
(
os
.
path
.
join
(
draw_img_save
,
os
.
path
.
basename
(
image_file
)),
os
.
path
.
join
(
draw_img_save
,
os
.
path
.
basename
(
image_file
)),
draw_img
[:,
:,
::
-
1
])
draw_img
[:,
:,
::
-
1
])
logger
.
info
(
"The visualized image saved in {}"
.
format
(
logger
.
info
(
"The visualized image saved in {}"
.
format
(
os
.
path
.
join
(
draw_img_save
,
os
.
path
.
basename
(
image_file
))))
os
.
path
.
join
(
draw_img_save
,
os
.
path
.
basename
(
image_file
))))
logger
.
info
(
"The predict total time is {}"
.
format
(
time
.
time
()
-
_st
))
logger
.
info
(
"
\n
The predict total time is {}"
.
format
(
total_time
))
img_num
=
text_sys
.
text_detector
.
det_times
.
img_num
if
args
.
benchmark
:
mems
=
{
'cpu_rss_mb'
:
cpu_mem
/
count
,
'gpu_rss_mb'
:
gpu_mem
/
count
,
'gpu_util'
:
gpu_util
*
100
/
count
}
else
:
mems
=
None
det_time_dict
=
text_sys
.
text_detector
.
det_times
.
report
(
average
=
True
)
rec_time_dict
=
text_sys
.
text_recognizer
.
rec_times
.
report
(
average
=
True
)
det_model_name
=
args
.
det_model_dir
rec_model_name
=
args
.
rec_model_dir
# construct det log information
model_info
=
{
'model_name'
:
args
.
det_model_dir
.
split
(
'/'
)[
-
1
],
'precision'
:
args
.
precision
}
data_info
=
{
'batch_size'
:
1
,
'shape'
:
'dynamic_shape'
,
'data_num'
:
det_time_dict
[
'img_num'
]
}
perf_info
=
{
'preprocess_time_s'
:
det_time_dict
[
'preprocess_time'
],
'inference_time_s'
:
det_time_dict
[
'inference_time'
],
'postprocess_time_s'
:
det_time_dict
[
'postprocess_time'
],
'total_time_s'
:
det_time_dict
[
'total_time'
]
}
benchmark_log
=
benchmark_utils
.
PaddleInferBenchmark
(
text_sys
.
text_detector
.
config
,
model_info
,
data_info
,
perf_info
,
mems
,
args
.
save_log_path
)
benchmark_log
(
"Det"
)
# construct rec log information
model_info
=
{
'model_name'
:
args
.
rec_model_dir
.
split
(
'/'
)[
-
1
],
'precision'
:
args
.
precision
}
data_info
=
{
'batch_size'
:
args
.
rec_batch_num
,
'shape'
:
'dynamic_shape'
,
'data_num'
:
rec_time_dict
[
'img_num'
]
}
perf_info
=
{
'preprocess_time_s'
:
rec_time_dict
[
'preprocess_time'
],
'inference_time_s'
:
rec_time_dict
[
'inference_time'
],
'postprocess_time_s'
:
rec_time_dict
[
'postprocess_time'
],
'total_time_s'
:
rec_time_dict
[
'total_time'
]
}
benchmark_log
=
benchmark_utils
.
PaddleInferBenchmark
(
text_sys
.
text_recognizer
.
config
,
model_info
,
data_info
,
perf_info
,
mems
,
args
.
save_log_path
)
benchmark_log
(
"Rec"
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
args
=
utility
.
parse_args
()
args
=
utility
.
parse_args
()
...
...
tools/infer/utility.py
View file @
253b8453
...
@@ -21,18 +21,23 @@ import json
...
@@ -21,18 +21,23 @@ import json
from
PIL
import
Image
,
ImageDraw
,
ImageFont
from
PIL
import
Image
,
ImageDraw
,
ImageFont
import
math
import
math
from
paddle
import
inference
from
paddle
import
inference
import
time
from
ppocr.utils.logging
import
get_logger
logger
=
get_logger
()
def
str2bool
(
v
):
return
v
.
lower
()
in
(
"true"
,
"t"
,
"1"
)
def
parse_args
():
def
str2bool
(
v
):
return
v
.
lower
()
in
(
"true"
,
"t"
,
"1"
)
def
init_args
():
parser
=
argparse
.
ArgumentParser
()
parser
=
argparse
.
ArgumentParser
()
# params for prediction engine
# params for prediction engine
parser
.
add_argument
(
"--use_gpu"
,
type
=
str2bool
,
default
=
True
)
parser
.
add_argument
(
"--use_gpu"
,
type
=
str2bool
,
default
=
True
)
parser
.
add_argument
(
"--ir_optim"
,
type
=
str2bool
,
default
=
True
)
parser
.
add_argument
(
"--ir_optim"
,
type
=
str2bool
,
default
=
True
)
parser
.
add_argument
(
"--use_tensorrt"
,
type
=
str2bool
,
default
=
False
)
parser
.
add_argument
(
"--use_tensorrt"
,
type
=
str2bool
,
default
=
False
)
parser
.
add_argument
(
"--
use_fp16
"
,
type
=
str
2bool
,
default
=
False
)
parser
.
add_argument
(
"--
precision
"
,
type
=
str
,
default
=
"fp32"
)
parser
.
add_argument
(
"--gpu_mem"
,
type
=
int
,
default
=
500
)
parser
.
add_argument
(
"--gpu_mem"
,
type
=
int
,
default
=
500
)
# params for text detector
# params for text detector
...
@@ -98,15 +103,97 @@ def parse_args():
...
@@ -98,15 +103,97 @@ def parse_args():
parser
.
add_argument
(
"--cls_thresh"
,
type
=
float
,
default
=
0.9
)
parser
.
add_argument
(
"--cls_thresh"
,
type
=
float
,
default
=
0.9
)
parser
.
add_argument
(
"--enable_mkldnn"
,
type
=
str2bool
,
default
=
False
)
parser
.
add_argument
(
"--enable_mkldnn"
,
type
=
str2bool
,
default
=
False
)
parser
.
add_argument
(
"--cpu_threads"
,
type
=
int
,
default
=
10
)
parser
.
add_argument
(
"--use_pdserving"
,
type
=
str2bool
,
default
=
False
)
parser
.
add_argument
(
"--use_pdserving"
,
type
=
str2bool
,
default
=
False
)
parser
.
add_argument
(
"--warmup"
,
type
=
str2bool
,
default
=
True
)
# multi-process
parser
.
add_argument
(
"--use_mp"
,
type
=
str2bool
,
default
=
False
)
parser
.
add_argument
(
"--use_mp"
,
type
=
str2bool
,
default
=
False
)
parser
.
add_argument
(
"--total_process_num"
,
type
=
int
,
default
=
1
)
parser
.
add_argument
(
"--total_process_num"
,
type
=
int
,
default
=
1
)
parser
.
add_argument
(
"--process_id"
,
type
=
int
,
default
=
0
)
parser
.
add_argument
(
"--process_id"
,
type
=
int
,
default
=
0
)
parser
.
add_argument
(
"--benchmark"
,
type
=
bool
,
default
=
False
)
parser
.
add_argument
(
"--save_log_path"
,
type
=
str
,
default
=
"./log_output/"
)
parser
.
add_argument
(
"--show_log"
,
type
=
str2bool
,
default
=
True
)
return
parser
def
parse_args
():
parser
=
init_args
()
return
parser
.
parse_args
()
return
parser
.
parse_args
()
class
Times
(
object
):
def
__init__
(
self
):
self
.
time
=
0.
self
.
st
=
0.
self
.
et
=
0.
def
start
(
self
):
self
.
st
=
time
.
time
()
def
end
(
self
,
accumulative
=
True
):
self
.
et
=
time
.
time
()
if
accumulative
:
self
.
time
+=
self
.
et
-
self
.
st
else
:
self
.
time
=
self
.
et
-
self
.
st
def
reset
(
self
):
self
.
time
=
0.
self
.
st
=
0.
self
.
et
=
0.
def
value
(
self
):
return
round
(
self
.
time
,
4
)
class
Timer
(
Times
):
def
__init__
(
self
):
super
(
Timer
,
self
).
__init__
()
self
.
total_time
=
Times
()
self
.
preprocess_time
=
Times
()
self
.
inference_time
=
Times
()
self
.
postprocess_time
=
Times
()
self
.
img_num
=
0
def
info
(
self
,
average
=
False
):
logger
.
info
(
"----------------------- Perf info -----------------------"
)
logger
.
info
(
"total_time: {}, img_num: {}"
.
format
(
self
.
total_time
.
value
(
),
self
.
img_num
))
preprocess_time
=
round
(
self
.
preprocess_time
.
value
()
/
self
.
img_num
,
4
)
if
average
else
self
.
preprocess_time
.
value
()
postprocess_time
=
round
(
self
.
postprocess_time
.
value
()
/
self
.
img_num
,
4
)
if
average
else
self
.
postprocess_time
.
value
()
inference_time
=
round
(
self
.
inference_time
.
value
()
/
self
.
img_num
,
4
)
if
average
else
self
.
inference_time
.
value
()
average_latency
=
self
.
total_time
.
value
()
/
self
.
img_num
logger
.
info
(
"average_latency(ms): {:.2f}, QPS: {:2f}"
.
format
(
average_latency
*
1000
,
1
/
average_latency
))
logger
.
info
(
"preprocess_latency(ms): {:.2f}, inference_latency(ms): {:.2f}, postprocess_latency(ms): {:.2f}"
.
format
(
preprocess_time
*
1000
,
inference_time
*
1000
,
postprocess_time
*
1000
))
def
report
(
self
,
average
=
False
):
dic
=
{}
dic
[
'preprocess_time'
]
=
round
(
self
.
preprocess_time
.
value
()
/
self
.
img_num
,
4
)
if
average
else
self
.
preprocess_time
.
value
()
dic
[
'postprocess_time'
]
=
round
(
self
.
postprocess_time
.
value
()
/
self
.
img_num
,
4
)
if
average
else
self
.
postprocess_time
.
value
()
dic
[
'inference_time'
]
=
round
(
self
.
inference_time
.
value
()
/
self
.
img_num
,
4
)
if
average
else
self
.
inference_time
.
value
()
dic
[
'img_num'
]
=
self
.
img_num
dic
[
'total_time'
]
=
round
(
self
.
total_time
.
value
(),
4
)
return
dic
def
create_predictor
(
args
,
mode
,
logger
):
def
create_predictor
(
args
,
mode
,
logger
):
if
mode
==
"det"
:
if
mode
==
"det"
:
model_dir
=
args
.
det_model_dir
model_dir
=
args
.
det_model_dir
...
@@ -114,6 +201,8 @@ def create_predictor(args, mode, logger):
...
@@ -114,6 +201,8 @@ def create_predictor(args, mode, logger):
model_dir
=
args
.
cls_model_dir
model_dir
=
args
.
cls_model_dir
elif
mode
==
'rec'
:
elif
mode
==
'rec'
:
model_dir
=
args
.
rec_model_dir
model_dir
=
args
.
rec_model_dir
elif
mode
==
'table'
:
model_dir
=
args
.
table_model_dir
else
:
else
:
model_dir
=
args
.
e2e_model_dir
model_dir
=
args
.
e2e_model_dir
...
@@ -131,30 +220,121 @@ def create_predictor(args, mode, logger):
...
@@ -131,30 +220,121 @@ def create_predictor(args, mode, logger):
config
=
inference
.
Config
(
model_file_path
,
params_file_path
)
config
=
inference
.
Config
(
model_file_path
,
params_file_path
)
if
hasattr
(
args
,
'precision'
):
if
args
.
precision
==
"fp16"
and
args
.
use_tensorrt
:
precision
=
inference
.
PrecisionType
.
Half
elif
args
.
precision
==
"int8"
:
precision
=
inference
.
PrecisionType
.
Int8
else
:
precision
=
inference
.
PrecisionType
.
Float32
else
:
precision
=
inference
.
PrecisionType
.
Float32
if
args
.
use_gpu
:
if
args
.
use_gpu
:
config
.
enable_use_gpu
(
args
.
gpu_mem
,
0
)
config
.
enable_use_gpu
(
args
.
gpu_mem
,
0
)
if
args
.
use_tensorrt
:
if
args
.
use_tensorrt
:
config
.
enable_tensorrt_engine
(
config
.
enable_tensorrt_engine
(
precision_mode
=
inference
.
PrecisionType
.
Half
precision_mode
=
inference
.
PrecisionType
.
Float32
,
if
args
.
use_fp16
else
inference
.
PrecisionType
.
Float32
,
max_batch_size
=
args
.
max_batch_size
,
max_batch_size
=
args
.
max_batch_size
)
min_subgraph_size
=
3
)
# skip the minmum trt subgraph
if
mode
==
"det"
and
"mobile"
in
model_file_path
:
min_input_shape
=
{
"x"
:
[
1
,
3
,
50
,
50
],
"conv2d_92.tmp_0"
:
[
1
,
96
,
20
,
20
],
"conv2d_91.tmp_0"
:
[
1
,
96
,
10
,
10
],
"nearest_interp_v2_1.tmp_0"
:
[
1
,
96
,
10
,
10
],
"nearest_interp_v2_2.tmp_0"
:
[
1
,
96
,
20
,
20
],
"nearest_interp_v2_3.tmp_0"
:
[
1
,
24
,
20
,
20
],
"nearest_interp_v2_4.tmp_0"
:
[
1
,
24
,
20
,
20
],
"nearest_interp_v2_5.tmp_0"
:
[
1
,
24
,
20
,
20
],
"elementwise_add_7"
:
[
1
,
56
,
2
,
2
],
"nearest_interp_v2_0.tmp_0"
:
[
1
,
96
,
2
,
2
]
}
max_input_shape
=
{
"x"
:
[
1
,
3
,
2000
,
2000
],
"conv2d_92.tmp_0"
:
[
1
,
96
,
400
,
400
],
"conv2d_91.tmp_0"
:
[
1
,
96
,
200
,
200
],
"nearest_interp_v2_1.tmp_0"
:
[
1
,
96
,
200
,
200
],
"nearest_interp_v2_2.tmp_0"
:
[
1
,
96
,
400
,
400
],
"nearest_interp_v2_3.tmp_0"
:
[
1
,
24
,
400
,
400
],
"nearest_interp_v2_4.tmp_0"
:
[
1
,
24
,
400
,
400
],
"nearest_interp_v2_5.tmp_0"
:
[
1
,
24
,
400
,
400
],
"elementwise_add_7"
:
[
1
,
56
,
400
,
400
],
"nearest_interp_v2_0.tmp_0"
:
[
1
,
96
,
400
,
400
]
}
opt_input_shape
=
{
"x"
:
[
1
,
3
,
640
,
640
],
"conv2d_92.tmp_0"
:
[
1
,
96
,
160
,
160
],
"conv2d_91.tmp_0"
:
[
1
,
96
,
80
,
80
],
"nearest_interp_v2_1.tmp_0"
:
[
1
,
96
,
80
,
80
],
"nearest_interp_v2_2.tmp_0"
:
[
1
,
96
,
160
,
160
],
"nearest_interp_v2_3.tmp_0"
:
[
1
,
24
,
160
,
160
],
"nearest_interp_v2_4.tmp_0"
:
[
1
,
24
,
160
,
160
],
"nearest_interp_v2_5.tmp_0"
:
[
1
,
24
,
160
,
160
],
"elementwise_add_7"
:
[
1
,
56
,
40
,
40
],
"nearest_interp_v2_0.tmp_0"
:
[
1
,
96
,
40
,
40
]
}
if
mode
==
"det"
and
"server"
in
model_file_path
:
min_input_shape
=
{
"x"
:
[
1
,
3
,
50
,
50
],
"conv2d_59.tmp_0"
:
[
1
,
96
,
20
,
20
],
"nearest_interp_v2_2.tmp_0"
:
[
1
,
96
,
20
,
20
],
"nearest_interp_v2_3.tmp_0"
:
[
1
,
24
,
20
,
20
],
"nearest_interp_v2_4.tmp_0"
:
[
1
,
24
,
20
,
20
],
"nearest_interp_v2_5.tmp_0"
:
[
1
,
24
,
20
,
20
]
}
max_input_shape
=
{
"x"
:
[
1
,
3
,
2000
,
2000
],
"conv2d_59.tmp_0"
:
[
1
,
96
,
400
,
400
],
"nearest_interp_v2_2.tmp_0"
:
[
1
,
96
,
400
,
400
],
"nearest_interp_v2_3.tmp_0"
:
[
1
,
24
,
400
,
400
],
"nearest_interp_v2_4.tmp_0"
:
[
1
,
24
,
400
,
400
],
"nearest_interp_v2_5.tmp_0"
:
[
1
,
24
,
400
,
400
]
}
opt_input_shape
=
{
"x"
:
[
1
,
3
,
640
,
640
],
"conv2d_59.tmp_0"
:
[
1
,
96
,
160
,
160
],
"nearest_interp_v2_2.tmp_0"
:
[
1
,
96
,
160
,
160
],
"nearest_interp_v2_3.tmp_0"
:
[
1
,
24
,
160
,
160
],
"nearest_interp_v2_4.tmp_0"
:
[
1
,
24
,
160
,
160
],
"nearest_interp_v2_5.tmp_0"
:
[
1
,
24
,
160
,
160
]
}
elif
mode
==
"rec"
:
min_input_shape
=
{
"x"
:
[
args
.
rec_batch_num
,
3
,
32
,
10
]}
max_input_shape
=
{
"x"
:
[
args
.
rec_batch_num
,
3
,
32
,
2000
]}
opt_input_shape
=
{
"x"
:
[
args
.
rec_batch_num
,
3
,
32
,
320
]}
elif
mode
==
"cls"
:
min_input_shape
=
{
"x"
:
[
args
.
rec_batch_num
,
3
,
48
,
10
]}
max_input_shape
=
{
"x"
:
[
args
.
rec_batch_num
,
3
,
48
,
2000
]}
opt_input_shape
=
{
"x"
:
[
args
.
rec_batch_num
,
3
,
48
,
320
]}
else
:
min_input_shape
=
{
"x"
:
[
1
,
3
,
10
,
10
]}
max_input_shape
=
{
"x"
:
[
1
,
3
,
1000
,
1000
]}
opt_input_shape
=
{
"x"
:
[
1
,
3
,
500
,
500
]}
config
.
set_trt_dynamic_shape_info
(
min_input_shape
,
max_input_shape
,
opt_input_shape
)
else
:
else
:
config
.
disable_gpu
()
config
.
disable_gpu
()
config
.
set_cpu_math_library_num_threads
(
6
)
if
hasattr
(
args
,
"cpu_threads"
):
config
.
set_cpu_math_library_num_threads
(
args
.
cpu_threads
)
else
:
# default cpu threads as 10
config
.
set_cpu_math_library_num_threads
(
10
)
if
args
.
enable_mkldnn
:
if
args
.
enable_mkldnn
:
# cache 10 different shapes for mkldnn to avoid memory leak
# cache 10 different shapes for mkldnn to avoid memory leak
config
.
set_mkldnn_cache_capacity
(
10
)
config
.
set_mkldnn_cache_capacity
(
10
)
config
.
enable_mkldnn
()
config
.
enable_mkldnn
()
# TODO LDOUBLEV: fix mkldnn bug when bach_size > 1
#config.set_mkldnn_op({'conv2d', 'depthwise_conv2d', 'pool2d', 'batch_norm'})
args
.
rec_batch_num
=
1
# enable memory optim
# enable memory optim
config
.
enable_memory_optim
()
config
.
enable_memory_optim
()
config
.
disable_glog_info
()
config
.
disable_glog_info
()
config
.
delete_pass
(
"conv_transpose_eltwiseadd_bn_fuse_pass"
)
config
.
delete_pass
(
"conv_transpose_eltwiseadd_bn_fuse_pass"
)
if
mode
==
'table'
:
config
.
delete_pass
(
"fc_fuse_pass"
)
# not supported for table
config
.
switch_use_feed_fetch_ops
(
False
)
config
.
switch_use_feed_fetch_ops
(
False
)
config
.
switch_ir_optim
(
True
)
# create predictor
# create predictor
predictor
=
inference
.
create_predictor
(
config
)
predictor
=
inference
.
create_predictor
(
config
)
...
@@ -166,7 +346,7 @@ def create_predictor(args, mode, logger):
...
@@ -166,7 +346,7 @@ def create_predictor(args, mode, logger):
for
output_name
in
output_names
:
for
output_name
in
output_names
:
output_tensor
=
predictor
.
get_output_handle
(
output_name
)
output_tensor
=
predictor
.
get_output_handle
(
output_name
)
output_tensors
.
append
(
output_tensor
)
output_tensors
.
append
(
output_tensor
)
return
predictor
,
input_tensor
,
output_tensors
return
predictor
,
input_tensor
,
output_tensors
,
config
def
draw_e2e_res
(
dt_boxes
,
strs
,
img_path
):
def
draw_e2e_res
(
dt_boxes
,
strs
,
img_path
):
...
@@ -210,7 +390,7 @@ def draw_ocr(image,
...
@@ -210,7 +390,7 @@ def draw_ocr(image,
txts
=
None
,
txts
=
None
,
scores
=
None
,
scores
=
None
,
drop_score
=
0.5
,
drop_score
=
0.5
,
font_path
=
"./doc/simfang.ttf"
):
font_path
=
"./doc/
fonts/
simfang.ttf"
):
"""
"""
Visualize the results of OCR detection and recognition
Visualize the results of OCR detection and recognition
args:
args:
...
@@ -417,23 +597,30 @@ def draw_boxes(image, boxes, scores=None, drop_score=0.5):
...
@@ -417,23 +597,30 @@ def draw_boxes(image, boxes, scores=None, drop_score=0.5):
return
image
return
image
def
get_current_memory_mb
(
gpu_id
=
None
):
"""
It is used to Obtain the memory usage of the CPU and GPU during the running of the program.
And this function Current program is time-consuming.
"""
import
pynvml
import
psutil
import
GPUtil
pid
=
os
.
getpid
()
p
=
psutil
.
Process
(
pid
)
info
=
p
.
memory_full_info
()
cpu_mem
=
info
.
uss
/
1024.
/
1024.
gpu_mem
=
0
gpu_percent
=
0
if
gpu_id
is
not
None
:
GPUs
=
GPUtil
.
getGPUs
()
gpu_load
=
GPUs
[
gpu_id
].
load
gpu_percent
=
gpu_load
pynvml
.
nvmlInit
()
handle
=
pynvml
.
nvmlDeviceGetHandleByIndex
(
0
)
meminfo
=
pynvml
.
nvmlDeviceGetMemoryInfo
(
handle
)
gpu_mem
=
meminfo
.
used
/
1024.
/
1024.
return
round
(
cpu_mem
,
4
),
round
(
gpu_mem
,
4
),
round
(
gpu_percent
,
4
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
test_img
=
"./doc/test_v2"
pass
predict_txt
=
"./doc/predict.txt"
f
=
open
(
predict_txt
,
'r'
)
data
=
f
.
readlines
()
img_path
,
anno
=
data
[
0
].
strip
().
split
(
'
\t
'
)
img_name
=
os
.
path
.
basename
(
img_path
)
img_path
=
os
.
path
.
join
(
test_img
,
img_name
)
image
=
Image
.
open
(
img_path
)
data
=
json
.
loads
(
anno
)
boxes
,
txts
,
scores
=
[],
[],
[]
for
dic
in
data
:
boxes
.
append
(
dic
[
'points'
])
txts
.
append
(
dic
[
'transcription'
])
scores
.
append
(
round
(
dic
[
'scores'
],
3
))
new_img
=
draw_ocr
(
image
,
boxes
,
txts
,
scores
)
cv2
.
imwrite
(
img_name
,
new_img
)
tools/infer_cls.py
View file @
253b8453
...
@@ -47,7 +47,7 @@ def main():
...
@@ -47,7 +47,7 @@ def main():
# build model
# build model
model
=
build_model
(
config
[
'Architecture'
])
model
=
build_model
(
config
[
'Architecture'
])
init_model
(
config
,
model
,
logger
)
init_model
(
config
,
model
)
# create data ops
# create data ops
transforms
=
[]
transforms
=
[]
...
...
tools/infer_det.py
View file @
253b8453
...
@@ -61,7 +61,7 @@ def main():
...
@@ -61,7 +61,7 @@ def main():
# build model
# build model
model
=
build_model
(
config
[
'Architecture'
])
model
=
build_model
(
config
[
'Architecture'
])
init_model
(
config
,
model
,
logger
)
init_model
(
config
,
model
)
# build post process
# build post process
post_process_class
=
build_post_process
(
config
[
'PostProcess'
])
post_process_class
=
build_post_process
(
config
[
'PostProcess'
])
...
@@ -112,4 +112,4 @@ def main():
...
@@ -112,4 +112,4 @@ def main():
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
config
,
device
,
logger
,
vdl_writer
=
program
.
preprocess
()
config
,
device
,
logger
,
vdl_writer
=
program
.
preprocess
()
main
()
main
()
\ No newline at end of file
tools/infer_e2e.py
View file @
253b8453
...
@@ -68,7 +68,7 @@ def main():
...
@@ -68,7 +68,7 @@ def main():
# build model
# build model
model
=
build_model
(
config
[
'Architecture'
])
model
=
build_model
(
config
[
'Architecture'
])
init_model
(
config
,
model
,
logger
)
init_model
(
config
,
model
)
# build post process
# build post process
post_process_class
=
build_post_process
(
config
[
'PostProcess'
],
post_process_class
=
build_post_process
(
config
[
'PostProcess'
],
...
...
tools/infer_rec.py
View file @
253b8453
...
@@ -20,6 +20,7 @@ import numpy as np
...
@@ -20,6 +20,7 @@ import numpy as np
import
os
import
os
import
sys
import
sys
import
json
__dir__
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
__dir__
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
sys
.
path
.
append
(
__dir__
)
sys
.
path
.
append
(
__dir__
)
...
@@ -46,12 +47,18 @@ def main():
...
@@ -46,12 +47,18 @@ def main():
# build model
# build model
if
hasattr
(
post_process_class
,
'character'
):
if
hasattr
(
post_process_class
,
'character'
):
config
[
'Architecture'
][
"Head"
][
'out_channels'
]
=
len
(
char_num
=
len
(
getattr
(
post_process_class
,
'character'
))
getattr
(
post_process_class
,
'character'
))
if
config
[
'Architecture'
][
"algorithm"
]
in
[
"Distillation"
,
]:
# distillation model
for
key
in
config
[
'Architecture'
][
"Models"
]:
config
[
'Architecture'
][
"Models"
][
key
][
"Head"
][
'out_channels'
]
=
char_num
else
:
# base rec model
config
[
'Architecture'
][
"Head"
][
'out_channels'
]
=
char_num
model
=
build_model
(
config
[
'Architecture'
])
model
=
build_model
(
config
[
'Architecture'
])
init_model
(
config
,
model
,
logger
)
init_model
(
config
,
model
)
# create data ops
# create data ops
transforms
=
[]
transforms
=
[]
...
@@ -107,11 +114,23 @@ def main():
...
@@ -107,11 +114,23 @@ def main():
else
:
else
:
preds
=
model
(
images
)
preds
=
model
(
images
)
post_result
=
post_process_class
(
preds
)
post_result
=
post_process_class
(
preds
)
for
rec_reuslt
in
post_result
:
info
=
None
logger
.
info
(
'
\t
result: {}'
.
format
(
rec_reuslt
))
if
isinstance
(
post_result
,
dict
):
if
len
(
rec_reuslt
)
>=
2
:
rec_info
=
dict
()
fout
.
write
(
file
+
"
\t
"
+
rec_reuslt
[
0
]
+
"
\t
"
+
str
(
for
key
in
post_result
:
rec_reuslt
[
1
])
+
"
\n
"
)
if
len
(
post_result
[
key
][
0
])
>=
2
:
rec_info
[
key
]
=
{
"label"
:
post_result
[
key
][
0
][
0
],
"score"
:
post_result
[
key
][
0
][
1
],
}
info
=
json
.
dumps
(
rec_info
)
else
:
if
len
(
post_result
[
0
])
>=
2
:
info
=
post_result
[
0
][
0
]
+
"
\t
"
+
str
(
post_result
[
0
][
1
])
if
info
is
not
None
:
logger
.
info
(
"
\t
result: {}"
.
format
(
info
))
fout
.
write
(
file
+
"
\t
"
+
info
)
logger
.
info
(
"success!"
)
logger
.
info
(
"success!"
)
...
...
tools/infer_table.py
0 → 100644
View file @
253b8453
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
numpy
as
np
import
os
import
sys
import
json
__dir__
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
sys
.
path
.
append
(
__dir__
)
sys
.
path
.
append
(
os
.
path
.
abspath
(
os
.
path
.
join
(
__dir__
,
'..'
)))
os
.
environ
[
"FLAGS_allocator_strategy"
]
=
'auto_growth'
import
paddle
from
paddle.jit
import
to_static
from
ppocr.data
import
create_operators
,
transform
from
ppocr.modeling.architectures
import
build_model
from
ppocr.postprocess
import
build_post_process
from
ppocr.utils.save_load
import
init_model
from
ppocr.utils.utility
import
get_image_file_list
import
tools.program
as
program
import
cv2
def
main
(
config
,
device
,
logger
,
vdl_writer
):
global_config
=
config
[
'Global'
]
# build post process
post_process_class
=
build_post_process
(
config
[
'PostProcess'
],
global_config
)
# build model
if
hasattr
(
post_process_class
,
'character'
):
config
[
'Architecture'
][
"Head"
][
'out_channels'
]
=
len
(
getattr
(
post_process_class
,
'character'
))
model
=
build_model
(
config
[
'Architecture'
])
init_model
(
config
,
model
,
logger
)
# create data ops
transforms
=
[]
use_padding
=
False
for
op
in
config
[
'Eval'
][
'dataset'
][
'transforms'
]:
op_name
=
list
(
op
)[
0
]
if
'Label'
in
op_name
:
continue
if
op_name
==
'KeepKeys'
:
op
[
op_name
][
'keep_keys'
]
=
[
'image'
]
if
op_name
==
"ResizeTableImage"
:
use_padding
=
True
padding_max_len
=
op
[
'ResizeTableImage'
][
'max_len'
]
transforms
.
append
(
op
)
global_config
[
'infer_mode'
]
=
True
ops
=
create_operators
(
transforms
,
global_config
)
model
.
eval
()
for
file
in
get_image_file_list
(
config
[
'Global'
][
'infer_img'
]):
logger
.
info
(
"infer_img: {}"
.
format
(
file
))
with
open
(
file
,
'rb'
)
as
f
:
img
=
f
.
read
()
data
=
{
'image'
:
img
}
batch
=
transform
(
data
,
ops
)
images
=
np
.
expand_dims
(
batch
[
0
],
axis
=
0
)
images
=
paddle
.
to_tensor
(
images
)
preds
=
model
(
images
)
post_result
=
post_process_class
(
preds
)
res_html_code
=
post_result
[
'res_html_code'
]
res_loc
=
post_result
[
'res_loc'
]
img
=
cv2
.
imread
(
file
)
imgh
,
imgw
=
img
.
shape
[
0
:
2
]
res_loc_final
=
[]
for
rno
in
range
(
len
(
res_loc
[
0
])):
x0
,
y0
,
x1
,
y1
=
res_loc
[
0
][
rno
]
left
=
max
(
int
(
imgw
*
x0
),
0
)
top
=
max
(
int
(
imgh
*
y0
),
0
)
right
=
min
(
int
(
imgw
*
x1
),
imgw
-
1
)
bottom
=
min
(
int
(
imgh
*
y1
),
imgh
-
1
)
cv2
.
rectangle
(
img
,
(
left
,
top
),
(
right
,
bottom
),
(
0
,
0
,
255
),
2
)
res_loc_final
.
append
([
left
,
top
,
right
,
bottom
])
res_loc_str
=
json
.
dumps
(
res_loc_final
)
logger
.
info
(
"result: {}, {}"
.
format
(
res_html_code
,
res_loc_final
))
logger
.
info
(
"success!"
)
if
__name__
==
'__main__'
:
config
,
device
,
logger
,
vdl_writer
=
program
.
preprocess
()
main
(
config
,
device
,
logger
,
vdl_writer
)
tools/program.py
View file @
253b8453
# Copyright (c) 202
0
PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 202
1
PaddlePaddle Authors. All Rights Reserved.
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# you may not use this file except in compliance with the License.
...
@@ -186,6 +186,7 @@ def train(config,
...
@@ -186,6 +186,7 @@ def train(config,
model
.
train
()
model
.
train
()
use_srn
=
config
[
'Architecture'
][
'algorithm'
]
==
"SRN"
use_srn
=
config
[
'Architecture'
][
'algorithm'
]
==
"SRN"
model_type
=
config
[
'Architecture'
][
'model_type'
]
if
'start_epoch'
in
best_model_dict
:
if
'start_epoch'
in
best_model_dict
:
start_epoch
=
best_model_dict
[
'start_epoch'
]
start_epoch
=
best_model_dict
[
'start_epoch'
]
...
@@ -208,9 +209,9 @@ def train(config,
...
@@ -208,9 +209,9 @@ def train(config,
lr
=
optimizer
.
get_lr
()
lr
=
optimizer
.
get_lr
()
images
=
batch
[
0
]
images
=
batch
[
0
]
if
use_srn
:
if
use_srn
:
others
=
batch
[
-
4
:]
preds
=
model
(
images
,
others
)
model_average
=
True
model_average
=
True
if
use_srn
or
model_type
==
'table'
:
preds
=
model
(
images
,
data
=
batch
[
1
:])
else
:
else
:
preds
=
model
(
images
)
preds
=
model
(
images
)
loss
=
loss_class
(
preds
,
batch
)
loss
=
loss_class
(
preds
,
batch
)
...
@@ -232,8 +233,11 @@ def train(config,
...
@@ -232,8 +233,11 @@ def train(config,
if
cal_metric_during_train
:
# only rec and cls need
if
cal_metric_during_train
:
# only rec and cls need
batch
=
[
item
.
numpy
()
for
item
in
batch
]
batch
=
[
item
.
numpy
()
for
item
in
batch
]
post_result
=
post_process_class
(
preds
,
batch
[
1
])
if
model_type
==
'table'
:
eval_class
(
post_result
,
batch
)
eval_class
(
preds
,
batch
)
else
:
post_result
=
post_process_class
(
preds
,
batch
[
1
])
eval_class
(
post_result
,
batch
)
metric
=
eval_class
.
get_metric
()
metric
=
eval_class
.
get_metric
()
train_stats
.
update
(
metric
)
train_stats
.
update
(
metric
)
...
@@ -269,6 +273,7 @@ def train(config,
...
@@ -269,6 +273,7 @@ def train(config,
valid_dataloader
,
valid_dataloader
,
post_process_class
,
post_process_class
,
eval_class
,
eval_class
,
model_type
,
use_srn
=
use_srn
)
use_srn
=
use_srn
)
cur_metric_str
=
'cur metric, {}'
.
format
(
', '
.
join
(
cur_metric_str
=
'cur metric, {}'
.
format
(
', '
.
join
(
[
'{}: {}'
.
format
(
k
,
v
)
for
k
,
v
in
cur_metric
.
items
()]))
[
'{}: {}'
.
format
(
k
,
v
)
for
k
,
v
in
cur_metric
.
items
()]))
...
@@ -336,7 +341,11 @@ def train(config,
...
@@ -336,7 +341,11 @@ def train(config,
return
return
def
eval
(
model
,
valid_dataloader
,
post_process_class
,
eval_class
,
def
eval
(
model
,
valid_dataloader
,
post_process_class
,
eval_class
,
model_type
,
use_srn
=
False
):
use_srn
=
False
):
model
.
eval
()
model
.
eval
()
with
paddle
.
no_grad
():
with
paddle
.
no_grad
():
...
@@ -350,19 +359,19 @@ def eval(model, valid_dataloader, post_process_class, eval_class,
...
@@ -350,19 +359,19 @@ def eval(model, valid_dataloader, post_process_class, eval_class,
break
break
images
=
batch
[
0
]
images
=
batch
[
0
]
start
=
time
.
time
()
start
=
time
.
time
()
if
use_srn
or
model_type
==
'table'
:
if
use_srn
:
preds
=
model
(
images
,
data
=
batch
[
1
:])
others
=
batch
[
-
4
:]
preds
=
model
(
images
,
others
)
else
:
else
:
preds
=
model
(
images
)
preds
=
model
(
images
)
batch
=
[
item
.
numpy
()
for
item
in
batch
]
batch
=
[
item
.
numpy
()
for
item
in
batch
]
# Obtain usable results from post-processing methods
# Obtain usable results from post-processing methods
post_result
=
post_process_class
(
preds
,
batch
[
1
])
total_time
+=
time
.
time
()
-
start
total_time
+=
time
.
time
()
-
start
# Evaluate the results of the current batch
# Evaluate the results of the current batch
eval_class
(
post_result
,
batch
)
if
model_type
==
'table'
:
eval_class
(
preds
,
batch
)
else
:
post_result
=
post_process_class
(
preds
,
batch
[
1
])
eval_class
(
post_result
,
batch
)
pbar
.
update
(
1
)
pbar
.
update
(
1
)
total_frame
+=
len
(
images
)
total_frame
+=
len
(
images
)
# Get final metric,eg. acc or hmean
# Get final metric,eg. acc or hmean
...
@@ -386,7 +395,7 @@ def preprocess(is_train=False):
...
@@ -386,7 +395,7 @@ def preprocess(is_train=False):
alg
=
config
[
'Architecture'
][
'algorithm'
]
alg
=
config
[
'Architecture'
][
'algorithm'
]
assert
alg
in
[
assert
alg
in
[
'EAST'
,
'DB'
,
'SAST'
,
'Rosetta'
,
'CRNN'
,
'STARNet'
,
'RARE'
,
'SRN'
,
'EAST'
,
'DB'
,
'SAST'
,
'Rosetta'
,
'CRNN'
,
'STARNet'
,
'RARE'
,
'SRN'
,
'CLS'
,
'PGNet'
'CLS'
,
'PGNet'
,
'Distillation'
,
'TableAttn'
]
]
device
=
'gpu:{}'
.
format
(
dist
.
ParallelEnv
().
dev_id
)
if
use_gpu
else
'cpu'
device
=
'gpu:{}'
.
format
(
dist
.
ParallelEnv
().
dev_id
)
if
use_gpu
else
'cpu'
...
...
tools/train.py
View file @
253b8453
...
@@ -72,7 +72,14 @@ def main(config, device, logger, vdl_writer):
...
@@ -72,7 +72,14 @@ def main(config, device, logger, vdl_writer):
# for rec algorithm
# for rec algorithm
if
hasattr
(
post_process_class
,
'character'
):
if
hasattr
(
post_process_class
,
'character'
):
char_num
=
len
(
getattr
(
post_process_class
,
'character'
))
char_num
=
len
(
getattr
(
post_process_class
,
'character'
))
config
[
'Architecture'
][
"Head"
][
'out_channels'
]
=
char_num
if
config
[
'Architecture'
][
"algorithm"
]
in
[
"Distillation"
,
]:
# distillation model
for
key
in
config
[
'Architecture'
][
"Models"
]:
config
[
'Architecture'
][
"Models"
][
key
][
"Head"
][
'out_channels'
]
=
char_num
else
:
# base rec model
config
[
'Architecture'
][
"Head"
][
'out_channels'
]
=
char_num
model
=
build_model
(
config
[
'Architecture'
])
model
=
build_model
(
config
[
'Architecture'
])
if
config
[
'Global'
][
'distributed'
]:
if
config
[
'Global'
][
'distributed'
]:
model
=
paddle
.
DataParallel
(
model
)
model
=
paddle
.
DataParallel
(
model
)
...
@@ -90,7 +97,7 @@ def main(config, device, logger, vdl_writer):
...
@@ -90,7 +97,7 @@ def main(config, device, logger, vdl_writer):
# build metric
# build metric
eval_class
=
build_metric
(
config
[
'Metric'
])
eval_class
=
build_metric
(
config
[
'Metric'
])
# load pretrain model
# load pretrain model
pre_best_model_dict
=
init_model
(
config
,
model
,
logger
,
optimizer
)
pre_best_model_dict
=
init_model
(
config
,
model
,
optimizer
)
logger
.
info
(
'train dataloader has {} iters'
.
format
(
len
(
train_dataloader
)))
logger
.
info
(
'train dataloader has {} iters'
.
format
(
len
(
train_dataloader
)))
if
valid_dataloader
is
not
None
:
if
valid_dataloader
is
not
None
:
...
...
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment